comparison isoformswitchanalyzer.xml @ 0:bb611fa3bc3b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/isoformswitchanalyzer commit 2c61e4c6151000201dd9a8323722a380bc235380
author iuc
date Tue, 24 Jan 2023 18:36:55 +0000
parents
children 5ae218cee629
comparison
equal deleted inserted replaced
-1:000000000000 0:bb611fa3bc3b
1 <tool id="isoformswitchanalyzer" name="IsoformSwitchAnalyzeR" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@">
2 <description>statistical identification of isoform switching</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro='xrefs'/>
7 <expand macro='requirements'/>
8 <stdio>
9 <regex match="Execution halted"
10 source="both"
11 level="fatal"
12 description="Execution halted." />
13 <regex match="Error in"
14 source="both"
15 level="fatal"
16 description="An undefined error occurred, please check your input carefully and contact your administrator." />
17 <regex match="Fatal error"
18 source="both"
19 level="fatal"
20 description="An undefined error occurred, please check your input carefully and contact your administrator." />
21 </stdio>
22 <command><![CDATA[
23 #if $functionMode.selector == 'data_import'
24 #if $functionMode.transcriptome.is_of_type("fasta.gz"):
25 ln -s '${functionMode.transcriptome}' './transcriptome.fasta.gz' &&
26 #set $transcriptome = './transcriptome.fasta.gz'
27 #else
28 ln -s '${functionMode.transcriptome}' './transcriptome.fasta'
29 #set $transcriptome = './transcriptome.fasta'
30 #end if
31 #if $functionMode.genomeAnnotation.is_of_type("gtf.gz"):
32 ln -s '${functionMode.genomeAnnotation}' './annotation.gtf.gz' &&
33 #set $annotation = './annotation.gtf.gz'
34 #else
35 ln -s '${functionMode.genomeAnnotation}' './annotation.gtf' &&
36 #set $annotation = './annotation.gtf'
37 #end if
38 mkdir -p 'input_files' &&
39 #if $functionMode.countFiles != 'disabled':
40 mkdir -p "count_files/factor1/" &&
41 mkdir -p "count_files/factor2/" &&
42 #end if
43 #if $functionMode.tool_source.selector == 'stringtie'
44 #set $filename = 't_data.ctab'
45 #else
46 #set $filename = 'quant.sf'
47 #end if
48
49 #for $index in range(len($functionMode.first_factor.trans_counts)):
50 mkdir './input_files/${functionMode.first_factor.factorLevel}${index}/' &&
51 ln -s $functionMode.first_factor.trans_counts[$index] './input_files/${functionMode.first_factor.factorLevel}${index}/${filename}' &&
52 #end for
53
54 #for $index in range(len($functionMode.second_factor.trans_counts)):
55 mkdir './input_files/${functionMode.second_factor.factorLevel}${index}/' &&
56 ln -s $functionMode.second_factor.trans_counts[$index] './input_files/${functionMode.second_factor.factorLevel}${index}/${filename}' &&
57 #end for
58
59 Rscript '${__tool_directory__}/IsoformSwitchAnalyzeR.R'
60 --modeSelector $functionMode.selector
61 --parentDir './input_files'
62 --annotation $annotation
63 --transcriptome $transcriptome
64 --toolSource $functionMode.tool_source.selector
65 #if $functionMode.tool_source.selector == 'stringtie'
66 --readLength $functionMode.tool_source.averageSize
67 $functionMode.tool_source.fixStringTieAnnotationProblem
68 #end if
69 --countFiles $functionMode.countFiles
70 #else if $functionMode.selector == 'first_step'
71 Rscript '${__tool_directory__}/IsoformSwitchAnalyzeR.R'
72 --modeSelector $functionMode.selector
73 --rObject $functionMode.robject
74 --alpha $functionMode.alpha
75 --dIFcutoff $functionMode.dIFcutoff
76 $functionMode.onlySigIsoforms
77 $functionMode.filterForConsequences
78 --geneExpressionCutoff $functionMode.prefilter.geneExpressionCutoff
79 --isoformExpressionCutoff $functionMode.prefilter.isoformExpressionCutoff
80 --IFcutoff $functionMode.prefilter.IFcutoff
81 $functionMode.prefilter.removeSingleIsformGenes
82 $functionMode.prefilter.keepIsoformInAllConditions
83 $functionMode.dexseq.correctForConfoundingFactors
84 $functionMode.dexseq.overwriteIFvalues
85 $functionMode.dexseq.reduceToSwitchingGenes
86 $functionMode.dexseq.reduceFurtherToGenesWithConsequencePotential
87 $functionMode.dexseq.keepIsoformInAllConditions
88 --minORFlength $functionMode.novel_isoform.minORFlength
89 --orfMethod $functionMode.novel_isoform.orfMethod
90 --PTCDistance $functionMode.novel_isoform.PTCDistance
91 $functionMode.extract_sequence.removeShortAAseq
92 $functionMode.extract_sequence.removeLongAAseq
93 $functionMode.extract_sequence.removeORFwithStop
94 $functionMode.extract_sequence.onlySwitchingGenes
95 #else
96 #if $functionMode.protein_domains.selector == 'enabled'
97 mkdir -p './pfam_files' &&
98 #for $index,$filename in enumerate($functionMode.protein_domains.analyzePFAM)
99 ln -s $filename './pfam_files/dataset${index}.txt' &&
100 #end for
101 #end if
102 #if $functionMode.signal_peptides.selector == 'enabled'
103 mkdir -p './signalp_files' &&
104 #for $index,$filename in enumerate($functionMode.signal_peptides.analyzeSignalP)
105 ln -s $filename './signalp_files/dataset${index}.txt' &&
106 #end for
107 #end if
108 #if $functionMode.disordered_regions.selector == 'netsurfp'
109 mkdir -p './netsurf_files' &&
110 #for $index,$filename in enumerate($functionMode.disordered_regions.analyzeNetSurfP2)
111 ln -s $filename './netsurf_files/dataset${index}.txt' &&
112 #end for
113 #end if
114 Rscript '${__tool_directory__}/IsoformSwitchAnalyzeR.R'
115 --modeSelector $functionMode.selector
116 --rObject $functionMode.robject
117 --analysisMode $functionMode.analysis_mode.selector
118 --alpha $functionMode.analysis_mode.alpha
119 --dIFcutoff $functionMode.analysis_mode.dIFcutoff
120 #if $functionMode.analysis_mode.selector == 'top'
121 --genesToPlot $functionMode.analysis_mode.n
122 $functionMode.analysis_mode.advanced_options.filterForConsequences
123 $functionMode.analysis_mode.advanced_options.sortByQvals
124 $functionMode.analysis_mode.advanced_options.onlySigIsoforms
125 $functionMode.analysis_mode.advanced_options.onlySwitchingGenes
126 $functionMode.analysis_mode.advanced_options.countGenes
127 $functionMode.analysis_mode.advanced_options.asFractionTotal
128 $functionMode.analysis_mode.advanced_options.plotGenes
129 $functionMode.analysis_mode.advanced_options.simplifyLocation
130 $functionMode.analysis_mode.advanced_options.removeEmptyConsequences
131 $functionMode.analysis_mode.advanced_options.removeEmptyConsequences
132 #else
133 --gene $functionMode.analysis_mode.gene
134 --IFcutoff $functionMode.analysis_mode.advanced_options.IFcutoff
135 $functionMode.analysis_mode.advanced_options.rescaleTranscripts
136 $functionMode.analysis_mode.advanced_options.reverseMinus
137 $functionMode.analysis_mode.advanced_options.addErrorbars
138 $functionMode.analysis_mode.advanced_options.onlySwitchingGenes
139 #end if
140 #if $functionMode.coding_potential.selector == 'cpat'
141 --pathToCPATresultFile $functionMode.coding_potential.analyzeCPAT
142 --codingCutoff $functionMode.coding_potential.codingCutoff
143 #else if $functionMode.coding_potential.selector == 'cpc2'
144 --pathToCPC2resultFile $functionMode.coding_potential.analyzeCPC2
145 $functionMode.coding_potential.removeNoncodingORFs
146 --codingCutoff $functionMode.coding_potential.codingCutoff
147 #end if
148 #if $functionMode.protein_domains.selector == 'enabled'
149 --pathToPFAMresultFile './pfam_files'
150 #end if
151 #if $functionMode.signal_peptides.selector == 'enabled'
152 --pathToSignalPresultFile './signalp_files'
153 --minSignalPeptideProbability $functionMode.signal_peptides.minSignalPeptideProbability
154 #end if
155 #if $functionMode.disordered_regions.selector == 'netsurfp'
156 --pathToNetSurfP2resultFile './netsurf_files'
157 --smoothingWindowSize $functionMode.disordered_regions.smoothingWindowSize
158 --probabilityCutoff $functionMode.disordered_regions.probabilityCutoff
159 --minIdrSize $functionMode.disordered_regions.minIdrSize
160 #else if $functionMode.disordered_regions.selector == 'iupred2a'
161 --pathToIUPred2AresultFile $functionMode.disordered_regions.AanalyzeIUPred2A
162 --smoothingWindowSize $functionMode.disordered_regions.smoothingWindowSize
163 --probabilityCutoff $functionMode.disordered_regions.probabilityCutoff
164 --minIdrSize $functionMode.disordered_regions.minIdrSize
165 $functionMode.disordered_regions.annotateBindingSites
166 --minIdrBindingSize $functionMode.disordered_regions.minIdrBindingSize
167 --minIdrBindingOverlapFrac $functionMode.disordered_regions.minIdrBindingOverlapFrac
168 #end if
169 --ntCutoff $functionMode.analyzeSwitchConsequences.ntCutoff
170 #if $functionMode.analyzeSwitchConsequences.ntFracCutoff
171 --ntFracCutoff $functionMode.analyzeSwitchConsequences.ntFracCutoff
172 #end if
173 --ntJCsimCutoff $functionMode.analyzeSwitchConsequences.ntJCsimCutoff
174 --AaCutoff $functionMode.analyzeSwitchConsequences.AaCutoff
175 --AaFracCutoff $functionMode.analyzeSwitchConsequences.AaFracCutoff
176 --AaJCsimCutoff $functionMode.analyzeSwitchConsequences.AaJCsimCutoff
177 $functionMode.analyzeSwitchConsequences.removeNonConseqSwitches
178 #if $functionMode.analysis_mode.selector == 'top'
179 && mkdir -p './pdf_outputs/'
180 && mv *pdf './pdf_outputs/'
181 && mv *_vs_* gene_plots
182 #end if
183 #end if
184 ]]></command>
185 <inputs>
186 <conditional name="functionMode">
187 <param name="selector" type="select" label="Tool function mode"
188 help="The first step of a IsoformSwitchAnalyzeR workflow is to import and integrate the isoform quantification
189 with its basic annotation. Once you have all the relevant data imported into R (IsoformSwitchAnalyzeR will
190 also help you with that), the workflow for identification and analysis of isoform switches with functional
191 consequences can be divided into two parts.">
192 <option value="data_import">Import data</option>
193 <option value="first_step">Analysis part one: Extract isoform switches and their sequences</option>
194 <option value="second_step">Analysis part two: Plot all isoform switches and their annotation</option>
195 </param>
196 <when value="data_import">
197 <section name="first_factor" title="1: Factor level" expanded="true">
198 <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor' or 'treated'"
199 help="Only letters, numbers and underscores will be retained in this field">
200 <sanitizer>
201 <valid initial="string.letters,string.digits"><add value="_" /></valid>
202 </sanitizer>
203 </param>
204 <param name="trans_counts" type="data" format="tabular" multiple="true" label="Transcript-level expression measurements"/>
205 </section>
206 <section name="second_factor" title="2: Factor level" expanded="true">
207 <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor' or 'treated'"
208 help="Only letters, numbers and underscores will be retained in this field">
209 <sanitizer>
210 <valid initial="string.letters,string.digits"><add value="_" /></valid>
211 </sanitizer>
212 </param>
213 <param name="trans_counts" type="data" format="tabular" multiple="true" label="Transcript-level expression measurements"/>
214 </section>
215 <conditional name="tool_source">
216 <param name="selector" type="select" label="Quantification data source" help="IsoformSwitchAnalyzeR has different functions for importing data from different sources.">
217 <option value="stringtie">StringTie</option>
218 <option value="salmon">Salmon/Kallisto</option>
219 </param>
220 <when value="salmon"/>
221 <when value="stringtie">
222 <param name="averageSize" type="integer" min="0" value="150" label="Average read length" help="Must be the number of base pairs sequenced. e.g. if the data
223 quantified is 75 bp paired ends the the user should supply readLength=75" />
224 <param argument="fixStringTieAnnotationProblem" type="boolean" truevalue="--fixStringTieAnnotationProblem" falsevalue="" checked="true"
225 label="Fix StringTie annotation problem" help="This option will automatically try and correct some of the annoation problems created when
226 doing transcript assembly (unassigned transcripts and merged genes)" />
227 </when>
228 </conditional>
229 <param name="genomeAnnotation" type="data" format="gtf,gtf.gz" label="Genome annotation (GTF)"
230 help="It is used to integrate the coding sequence (CDS) regions from in the GTF file as the ORF regions used by IsoformSwitchAnalyzeR." />
231 <param name="transcriptome" type="data" format="fasta,fasta.gz" label="Transcriptome"
232 help="Please note this different from a fasta file with the sequences of the entire genome." />
233 <param name="countFiles" type="select" label="Generate count matrix files" help="If IsoformSwitchAnalyzeR is used for fixing Stringtie annotation
234 problem, it can generate count files for analyzing differential expression with DESeq2 (when selecting collection) or CEMiTool (when secting the expression matrix format).">
235 <option value="disabled">Disabled</option>
236 <option value="collection">Collection of count files</option>
237 <option value="matrix">Expression matrix</option>
238 </param>
239 </when>
240
241 <!--WRAPPER FIRST STEP SECTION-->
242
243 <when value="first_step">
244 <param name="robject" type="data" format="rdata" label="IsoformSwitchAnalyzeR R object" help="It is generated when running the analysis part 1." />
245 <expand macro="macro_alpha_difcutoff"/>
246 <expand macro="macro_onlysigisoforms1"/>
247 <param argument="filterForConsequences" type="boolean" truevalue="--filterForConsequences" falsevalue="" checked="false"
248 label="Filter for consquences" help="Filter for genes with functional consequences. The output will then be the number of significant genes
249 and isoforms originating from genes with predicted consequences" />
250 <section name="prefilter" title="Pre-filter parameters" help="SwitchAnalyzeR will remove genes/isoforms with the aim of allowing faster
251 processing time as well as more trustworthy results.">
252 <param argument="geneExpressionCutoff" type="float" min="0" value="1" label="Gene expression cutoff" help="The expression cutoff (most
253 likely in TPM/RPKM/FPKM) which the average expression in BOTH condisions must be higher than." />
254 <param argument="isoformExpressionCutoff" type="float" min="0" value="0" label="Isoform expresion cutoff" help="The expression cutoff (most
255 likely in RPKM/FPKM) which isoforms must be expressed more than, in at least one conditions of a comparison. Default is 0 (which removes
256 completely unused isoforms)." />
257 <expand macro="macro_ifcutoff" value="0.01" help="The cutoff on isoform usage (measured as Isoform Fraction) which isoforms must be used more
258 than in at least one conditions of a comparison" />
259 <param argument="removeSingleIsformGenes" type="boolean" truevalue="--removeSingleIsformGenes" falsevalue="" checked="true" label="Remove single isoform genes"
260 help="Only keep genes containing more than one isoform (in any comparison, after the other filters have been applied)" />
261 <expand macro="macro_keeisoforminall" checked="false"/>
262 </section>
263 <section name="dexseq" title="DEXseq parameters" help="DEXSeq is used to test isoforms (isoform resolution) for differential isoform usage.">
264 <param argument="correctForConfoundingFactors" type="boolean" truevalue="--correctForConfoundingFactors" falsevalue="" checked="true" label="Correct for confounding factors"
265 help="A logic indicating whether IsoformSwitchAnalyzeR to use limma to correct for any confounding effects (e.g. batch effects) as indicated in the
266 design matrix (as additional columns (apart from the two default columns)) " />
267 <param argument="overwriteIFvalues" type="boolean" truevalue="--overwriteIFvalues" falsevalue="" checked="true" label="Overwrite IF values" help="It indicates
268 whether to overwrite the IF and dIF stored in the switchAnalyzeRlist with the corrected IF and dIF values - if no confounding effects are
269 present in the design matrix this will not change anything" />
270 <param argument="reduceToSwitchingGenes" type="boolean" truevalue="--reduceToSwitchingGenes" falsevalue="" checked="true" label="Reduce to switch genes" help="Reduced to the
271 genes which contains at least one isoform significantly differential used (as indicated by the alpha and dIFcutoff parameters" />
272 <param argument="reduceFurtherToGenesWithConsequencePotential" type="boolean" truevalue="--reduceFurtherToGenesWithConsequencePotential" falsevalue="" checked="false"
273 label="Reduce to genes with consequence potential" help="This argument is a more strict version of reduceToSwitchingGenes
274 as it not only requires that at least one isoform is significantly differential used (as indicated by the alpha and dIFcutoff parameters) but also that there is an isoform
275 with the opposite effect size (e.g. used less if the first isoform is used more). The minimum effect size of the opposing isoform usage is also controlled by dIFcutoff.
276 The existence of such an opposing isoform means a switch pair can be formed" />
277 <expand macro="macro_keeisoforminall" checked="true"/>
278 </section>
279 <section name="novel_isoform" title="Novel isoform analysis parameters" help="For the subset of isoforms not already annotated with ORFs this
280 function predicts the most likely Open Reading Frame (ORF) and the NMD sensitivity. This function is made to help annotate
281 isoforms if you have performed (guided) de-novo isoform reconstruction (isoform deconvolution).">
282 <param argument="minORFlength" type="integer" min="0" value="100" label="Minimum ORF length" help="The minimum size (in nucleotides) an
283 ORF must be to be considered (and reported). Default is 100 nucleotides, which around 97.5% of Gencode coding isoforms in both human and mouse have." />
284 <param argument="orfMethod" type="select" label="ORF identification method" help="More information in the help section">
285 <option value="longest.AnnotatedWhenPossible">Longest and annotated when possible</option>
286 <option value="longest">Longest</option>
287 <option value="mostUpstream">Most upstream</option>
288 <option value="longestAnnotated">Longest annotated</option>
289 <option value="mostUpstreamAnnoated">Most upstream annotated</option>
290 </param>
291 <param argument="PTCDistance" type="integer" min="0" value="50" label="Maximal allowed premature termination codon-distance" help="The minimum
292 distance (number of nucleotides) from the STOP codon to the final exon-exon junction. If the distance from the STOP to the final exon-exon
293 junction is larger than this the isoform to be marked as NMD-sensitive. " />
294 </section>
295 <section name="extract_sequence" title="Sequence extraction parameters" help="switchAnalyzeR will extracts the nucleotide (NT) sequence of transcripts by
296 extracting and concatenating the sequences of a reference genome corresponding to the genomic coordinates of the isoforms. ">
297 <expand macro="macro_onlyswitching"/>
298 <param argument="removeShortAAseq" type="boolean" truevalue="--removeShortAAseq" falsevalue="" checked="true" label="Remove short aminoacid sequences" help="This
299 option exist to allows for easier usage of the Pfam and SignalP web servers which both currently have restrictions on allowed sequence lengths. If
300 enabled AA sequences are filtered to be > 5 AA. This will only affect the sequences written to the FASTA file not the sequences added to the switchAnalyzeRlist" />
301 <param argument="removeLongAAseq" type="boolean" truevalue="--removeLongAAseq" falsevalue="" checked="false" label="Remove long aminoacid sequences" help="A
302 logical indicating whether to removesequences based on their length. This option exist to allows for easier usage of the Pfam and SignalP web servers
303 which both currently have restrictions on allowed sequence lengths. If enabled AA sequences are filtered to be smaller 1000 AA. This will only affect the
304 sequences written to the fasta file (if writeToFile=TRUE) not the sequences added to the switchAnalyzeRlist. " />
305 <param argument="removeORFwithStop" type="boolean" truevalue="--removeORFwithStop" falsevalue="" checked="true" label="Remove ORFs containint STOP codons" help="ORFs
306 containing stop codons, defined as * when the ORF nucleotide sequences is translated to the amino acid sequence, should be A) removed from the ORF
307 annotation in the switchAnalyzeRlist and B) removed from the sequences added to the switchAnalyzeRlist and/or written to FASTA files. This is only
308 necessary if you are analyzing quantified known annotated data where you supplied a GTF file to the import function" />
309 </section>
310 <param name="outputs_first" type="select" display="checkboxes" multiple="true" label="Outputs selector">
311 <option value="nt" selected="true">Nucleotide sequences</option>
312 <option value="aa" selected="true">Aminoacid sequences</option>
313 <option value="summary" selected="true">Gene switch summary</option>
314 </param>
315 </when>
316
317 <!-- WRAPPER SECOND STEP SECTION-->
318
319 <when value="second_step">
320 <param name="robject" type="data" format="rdata" label="IsoformSwitchAnalyzeR R object" help="It is generated when running the analysis part 2." />
321 <conditional name="analysis_mode">
322 <param name="selector" type="select" label="Analysis mode" help="This selector allows so specify if you want to analyze a specific gene or
323 the (top) switching genes/isoforms ">
324 <option value="top" selected="true">Full analysis</option>
325 <option value="single">Analyze specific gene</option>
326 </param>
327 <when value="top">
328 <expand macro="macro_alpha_difcutoff"/>
329 <param argument="n" type="integer" min="1" value="10" label="Number of top switching features (genes/isoforms) to plot"
330 help="This parameters allows to specify the number of top genes/isoforms to plot"/>
331 <section name="advanced_options" title="Full analysis advanced options">
332 <param argument="filterForConsequences" type="boolean" truevalue="--filterForConsequences" falsevalue="" checked="false" label="Filter genes with functional consequences"
333 help="The output will then be the number of significant genes and isoforms originating from genes with predicted consequences"/>
334 <param argument="sortByQvals" type="boolean" truevalue="--sortByQvals" falsevalue="" checked="true" label="Sorting mode" help="A logic indicating
335 whether to the top n features are sorted by decreasing significance (increasing q-values) (if enabled) or decreasing switch size
336 (absolute dIF, which are still significant as defined by alpha) (if disabled). The dIF values for genes are considered as the total
337 change within the gene calculated as sum(abs(dIF)) for each gene" />
338 <expand macro="macro_onlysigisoforms2"/>
339 <expand macro="macro_onlyswitching"/>
340 <param argument="countGenes" type="boolean" truevalue="--countGenes" falsevalue="" checked="true" label="Number genes or isoform switches counts" help="This parameter indicates
341 whether it is the number of genes (if enabled) or isoform switches (if disabled) which primary result in gain/loss that are counted" />
342 <param argument="asFractionTotal" type="boolean" truevalue="--asFractionTotal" falsevalue="" checked="false" label="Summary as numbers of as fraction" help="The consequences/splicing events
343 should be summarized calculated as numbers (if disabled) or as a fraction of the total number of switches/genes" />
344 <param argument="plotGenes" type="boolean" truevalue="--plotGenes" falsevalue="" checked="false" label="Plot number/fraction of genes or isoforms" help="Plot the number/fraction of genes with
345 (if enabled) or isoforms (if disabled) involved with isoform switches with functional consequences (both filtered via alpha and dIFcutoff)" />
346 <param argument="simplifyLocation" type="boolean" truevalue="--simplifyLocation" falsevalue="" checked="true" label="Simplify location" help="Simplify the switches involved in changes
347 in subcellular localizations (due the the hundreds of possible combinations)" />
348 <param argument="removeEmptyConsequences" type="boolean" truevalue="--removeEmptyConsequences" falsevalue="" checked="false" label="Remove empty consequences" help="Remove consequenses analyzed
349 but where no differences was found (those showing zero in the plot)" />
350 <param argument="analysisOppositeConsequence" type="boolean" truevalue="--analysisOppositeConsequence" falsevalue="" checked="false"
351 label="Analysis opposite consequences in enrichment analysis" help="Reverse the analysis meaning if 'Domain gains' are analyze will case the analysis to be performed on 'Domain loss'.
352 The main effect is for the visual appearance of plot which will be mirrored (around the 0.5 fraction)" />
353 </section>
354 </when>
355 <when value="single">
356 <param argument="gene" type="text" value="" label="Gene name" help="Either the gene_id or the gene name of the gene to plot">
357 <sanitizer invalid_char="">
358 <valid initial="string.letters,string.digits">
359 <add value="_" />
360 <add value="-" />
361 </valid>
362 </sanitizer>
363 <validator type="regex">[0-9a-zA-Z_-]+</validator>
364 </param>
365 <expand macro="macro_alpha_difcutoff"/>
366 <section name="advanced_options" title="Single gene mode advanced options">
367 <expand macro="macro_ifcutoff" value="0.05" help="The cutoff used for the minimum contribution to gene expression (in at least one
368 condition) for an isoforms must have to be plotted (measured as Isoform Fraction (IF) values)" />
369 <param argument="rescaleTranscripts" type="boolean" truevalue="--rescaleTranscripts" falsevalue="" checked="true" label="Rescale transcripts"
370 help="All the isoforms should be resealed to the square root of their original sizes. This feature is implemented because
371 introns usually are much larger than exons making it difficult to see structural changes. This is very useful for structural
372 visualization but the scaling might distort actual intron and exon sizes" />
373 <param argument="reverseMinus" type="boolean" truevalue="--reverseMinus" falsevalue="" checked="true" label="Isoforms on minus strand
374 should be inverted" help="Isoforms on minus strand should be inverted so they are visualized as going from left to right
375 instead of right to left" />
376 <param argument="addErrorbars" type="boolean" truevalue="--addErrorbars" falsevalue="" checked="true" label="Add error bars" help="Error
377 bars should be added to the expression plots to show uncertainty in estimates" />
378 <expand macro="macro_onlyswitching"/>
379 </section>
380 </when>
381 </conditional>
382 <conditional name="coding_potential">
383 <param name="selector" type="select" label="Include prediction of coding potential information"
384 help="Integrate in the analysis de output from CPAT or CPC2.">
385 <option value="disabled">Disabled</option>
386 <option value="cpat">CPAT</option>
387 <option value="cpc2">CPC2</option>
388 </param>
389 <when value="disabled"/>
390 <when value="cpat">
391 <param argument="analyzeCPAT" type="data" format="txt" label="CPAT result file"
392 help=" Use default parameters and the nucleotide fasta file (_nt.fasta). If the webserver was used, download the tab-delimited
393 result file (available at the bottom of the result page). If a stand-alone version was used, just supply the path to the result file" />
394 <param argument="codingCutoff" type="float" min="0" max="1" value="0.725" label="Coding cutoff" help="cutoff used by CPAT for distinguishing between
395 coding and non-coding transcripts. The cutoff is dependent on species analyzed. IsoformSwitchAnalyzerR developers suggest that the optimal cutoff
396 for overlapping coding and noncoding isoforms are 0.725 for human and 0.721 for mouse. However the suggested cutoffs from the CPAT develpers
397 derived by comparing known genes to random non-coding regions of the genome is 0.364 for human and 0.44 for mouse" />
398 </when>
399 <when value="cpc2">
400 <param argument="analyzeCPC2" type="data" format="txt" label="CPC2 result file"
401 help="Use default parameters and if required select the most similar species. If the webserver (batch submission) was used,
402 download the tab-delimited result file (via the “Download the result” button). If a stand-alone version was just just supply the path to the result file" />
403 <param argument="removeNoncodingORFs" type="boolean" truevalue="--removeNoncodingORFs" falsevalue="" checked="false" label="Remove non-coding ORFs" help="Remove ORF information
404 from the isoforms which the CPC2 analysis classifies as non-coding. This can be particular useful if the isoform (and ORF) was predicted de-novo but is not
405 recommended if ORFs was imported from a GTF file" />
406 <param argument="codingCutoff" type="float" min="0" max="1" value="0.5" label="Coding cutoff" help="Numeric indicating the cutoff used by CPC2 for distinguishing
407 between coding and non-coding transcripts. The cutoff appears to be species independent." />
408 </when>
409 </conditional>
410 <conditional name="protein_domains">
411 <param name="selector" type="select" label="Include Pfam information" help="Pfam is a database of protein families that includes their annotations and multiple sequence
412 alignments generated using hidden Markov models.">
413 <option value="disabled">Disabled</option>
414 <option value="enabled">Enabled</option>
415 </param>
416 <when value="disabled"/>
417 <when value="enabled">
418 <param argument="analyzePFAM" type="data" format="txt" multiple="true" optional="true" label="Include Pfam results (sequence analysis of protein domains)" help="Use default
419 parameters and the amino acid fasta file (_AA.fasta). If the webserver is used you need to copy/paste the result part of the mail you receive into an empty plain text
420 document (notepad, sublimetext, TextEdit or similar (not Word)) and save that to a plain text (txt) file. The path to that file should be supplied. If a stand-alone
421 version was used, just supply the path to the result file" />
422 </when>
423 </conditional>
424 <conditional name="signal_peptides">
425 <param name="selector" type="select" label="Include SignalP results" help="Integration of the result of SignalP (external sequence analysis of signal peptides)">
426 <option value="disabled">Disabled</option>
427 <option value="enabled">Enabled</option>
428 </param>
429 <when value="disabled"/>
430 <when value="enabled">
431 <param argument="analyzeSignalP" type="data" format="txt" multiple="true" optional="true" label="SignalP" help="Use the amino acid fasta file (_AA.fasta). If using the webserver SignalP
432 should be run with the parameter 'Short output (no figures)' under 'Output format' and one should select the appropriate 'Organism group'. When
433 using a stand-alone version SignalP should be run with the '-f summary' option. If using the webserver the results can be downloaded using the
434 'Downloads' button in the top-right corner where the user should select 'Prediction summary' and supply the path to the resulting file to the
435 'pathToSignalPresultFile' argument. If a stand-alone version was just supply the path to the summary result file." />
436 <param argument="minSignalPeptideProbability" type="float" min="0" max="1" value="0.5" label="Minimum probability for calling a signal peptide"/>
437 </when>
438 </conditional>
439
440 <conditional name="disordered_regions">
441 <param name="selector" type="select" label="Include prediction of intrinsically disordered Regions (IDR) information"
442 help="Integrate in the analysis de output from IUPred2A or NetSurfP-2">
443 <option value="disabled">Disabled</option>
444 <option value="iupred2a">IUPred2A</option>
445 <option value="netsurfp">NetSurfP-2</option>
446 </param>
447 <when value="disabled"/>
448 <when value="iupred2a">
449 <param argument="AanalyzeIUPred2A" type="data" format="txt,gz" label="IUPred2A result file" help="Can be gziped. If
450 multiple result files were created (multiple web-server runs) just supply all of them." />
451 <expand macro="macro_disordered_regions"/>
452 <param argument="annotateBindingSites" type="boolean" truevalue="--annotateBindingSites" falsevalue="" checked="true" label="Annotate binding sites"
453 help="Integrate the ANCHOR2 prediction of Intrinsically Disordered Binding Regions (IDBRs)" />
454 <param argument="minIdrBindingSize" type="integer" min="0" value="15" label="Minimum IDBR binding size" help="How long a stretch of
455 binding site the region part of the Intrinsically Disordered Binding Regions (IDBR)" />
456 <param argument="minIdrBindingOverlapFrac" type="float" min="0" value="0.8" label="Minimum fraction of a predicted IDBR" help="Minimum
457 fraction of a predicted IDBR must also be within a IDR before the IDR is considered as a an IDR with a binding region" />
458 </when>
459 <when value="netsurfp">
460 <param argument="analyzeNetSurfP2" type="data" format="txt,gz" multiple="true" label="NetSurfP-2 result file" help="Can be gziped. If
461 multiple result files were created (multiple web-server runs) just supply all of them." />
462 <expand macro="macro_disordered_regions"/>
463 </when>
464 </conditional>
465 <section name="analyzeSwitchConsequences" title="Analyze switch consequences parameters">
466 <param argument="ntCutoff" type="integer" min="0" value="50" label="Nucleotide length cutoff" help="The length difference (in nucleotides) a
467 comparison must be larger than for reporting differences" />
468 <param argument="ntFracCutoff" type="float" min="0" max="1" optional="true" label="Nucleotide length fraction cutoff" help="The cutoff in length
469 difference, measured as a fraction of the length of the downregulated isoform, a comparison must be larger than for
470 reporting differences. For example does 0.05 mean the upregulated isoform must be 5% longer/shorter before it is reported. " />
471 <param argument="ntJCsimCutoff" type="float" min="0" max="1" value="0.8" label="Cutoff on Jaccard similarity between the overlap of two nucloetide sequences" help=" If the
472 measured JCsim is smaller than this cutoff the sequences are considered different and reported as such" />
473 <param argument="AaCutoff" type="integer" min="0" value="10" label="Aminoacid lenght cutoff" help="Length difference (in AA) a comparison must be larger
474 than for reporting differences when evaluating ’ORF_seq_similarity’, primarily implemented to avoid differences in very short AA sequences being
475 classified as different" />
476 <param argument="AaFracCutoff" type="float" min="0" max="1" value="0.5" label="Aminoacid length fraction cutoff" help="Cutoff of length difference of
477 the protein domain or IDR. The difference is measured as a fraction of the longest region, a comparison must be larger than before reporting it" />
478 <param argument="AaJCsimCutoff" type="float" min="0" max="1" value="0.9" label="Cutoff between the overlap of two aminoacid sequences" help="If the
479 measured JCsim is smaller than this cutoff the sequences are considered different and reported as such" />
480 <param argument="removeNonConseqSwitches" type="boolean" truevalue="--removeNonConseqSwitches" falsevalue="" checked="true" label="Remove the comparison of isoforms
481 where no consequences were found" />
482 </section>
483 </when>
484 </conditional>
485 </inputs>
486 <outputs>
487 <collection name="collection_counts_factor1" type="list" label="${tool.name} on ${on_string}: gene counts factor1">
488 <discover_datasets pattern="__designation_and_ext__" format="tabular" directory="count_files/factor1" />
489 <filter>functionMode['selector'] == 'data_import'</filter>
490 <filter>functionMode['countFiles'] == 'collection'</filter>
491 </collection>
492 <collection name="collection_counts_factor2" type="list" label="${tool.name} on ${on_string}: gene counts factor2">
493 <discover_datasets pattern="__designation_and_ext__" format="tabular" directory="count_files/factor2" />
494 <filter>functionMode['selector'] == 'data_import'</filter>
495 <filter>functionMode['countFiles'] == 'collection'</filter>
496 </collection>
497 <data name="matrix_counts" format="tabular" from_work_dir="count_files/matrix.tabular" label="${tool.name} on ${on_string}: gene counts matrix">
498 <filter>functionMode['selector'] == 'data_import'</filter>
499 <filter>functionMode['countFiles'] == 'matrix'</filter>
500 </data>
501 <data name="sample_annotation" format="tabular" from_work_dir="count_files/samples.tabular" label="${tool.name} on ${on_string}: samples annotation">
502 <filter>functionMode['selector'] == 'data_import'</filter>
503 <filter>functionMode['countFiles'] == 'matrix'</filter>
504 </data>
505 <data name="switchList" format="rdata" from_work_dir="SwitchList.Rda" label="${tool.name} on ${on_string}: SwitchList (RData)"/>
506 <data name="isoformAA" format="fasta" from_work_dir="isoformSwitchAnalyzeR_isoform_AA.fasta" label="${tool.name} on ${on_string}: aminoacid sequences">
507 <filter>functionMode['selector'] == 'first_step'</filter>
508 <filter>functionMode['outputs_first'] and 'aa' in functionMode['outputs_first']</filter>
509 </data>
510 <data name="isoformNT" format="fasta" from_work_dir="isoformSwitchAnalyzeR_isoform_nt.fasta" label="${tool.name} on ${on_string}: nucleotide sequences">
511 <filter>functionMode['selector'] == 'first_step'</filter>
512 <filter>functionMode['outputs_first'] and 'nt' in functionMode['outputs_first']</filter>
513 </data>
514 <data name="switchSummary" format="tabular" from_work_dir="switchSummary.tsv" label="${tool.name} on ${on_string}: summary">
515 <filter>functionMode['selector'] == 'first_step'</filter>
516 <filter>functionMode['outputs_first'] and 'summary' in functionMode['outputs_first']</filter>
517 </data>
518 <collection name="plots_summary" type="list" label="${tool.name} on ${on_string}: genome wide plots">
519 <discover_datasets pattern="__designation_and_ext__" format="pdf" directory="pdf_outputs" />
520 <filter>functionMode['selector'] == 'second_step'</filter>
521 <filter>functionMode['analysis_mode']['selector'] == 'top'</filter>
522 </collection>
523 <collection name="genes_consequences" type="list" label="${tool.name} on ${on_string}: isoform switches with predicted functional consequences plots">
524 <discover_datasets pattern="__designation_and_ext__" format="pdf" directory="gene_plots/with_consequences" />
525 <filter>functionMode['selector'] == 'second_step'</filter>
526 <filter>functionMode['analysis_mode']['selector'] == 'top'</filter>
527 </collection>
528 <collection name="genes_wo_consequences" type="list" label="${tool.name} on ${on_string}: isoform switches without predicted functional consequences plots">
529 <discover_datasets pattern="__designation_and_ext__" format="pdf" directory="gene_plots/without_consequences" />
530 <filter>functionMode['selector'] == 'second_step'</filter>
531 <filter>functionMode['analysis_mode']['selector'] == 'top'</filter>
532 </collection>
533 <data name="mostSwitching" format="tabular" from_work_dir="mostSwitchingGene.tsv" label="${tool.name} on ${on_string}: switching gene/isoforms">
534 <filter>functionMode['selector'] == 'second_step'</filter>
535 <filter>functionMode['analysis_mode']['selector'] == 'top'</filter>
536 </data>
537 <data name="consequencesSummary" format="tabular" from_work_dir="consequencesSummary.tsv" label="${tool.name} on ${on_string}: consequences summary">
538 <filter>functionMode['selector'] == 'second_step'</filter>
539 <filter>functionMode['analysis_mode']['selector'] == 'top'</filter>
540 </data>
541 <data name="consequencesEnrichment" format="tabular" from_work_dir="consequencesEnrichment.tsv" label="${tool.name} on ${on_string}: consequences enrichment">
542 <filter>functionMode['selector'] == 'second_step'</filter>
543 <filter>functionMode['analysis_mode']['selector'] == 'top'</filter>
544 </data>
545 <data name="splicingSummary" format="tabular" from_work_dir="splicingSummary.tsv" label="${tool.name} on ${on_string}: splicing summary">
546 <filter>functionMode['selector'] == 'second_step'</filter>
547 <filter>functionMode['analysis_mode']['selector'] == 'top'</filter>
548 </data>
549 <data name="splicingEnrichment" format="tabular" from_work_dir="splicingEnrichment.tsv" label="${tool.name} on ${on_string}: splicing enrichment">
550 <filter>functionMode['selector'] == 'second_step'</filter>
551 <filter>functionMode['analysis_mode']['selector'] == 'top'</filter>
552 </data>
553 <data name="single_gene" format="pdf" from_work_dir="single_gene.pdf" label="${tool.name} on ${on_string}: single gene analysis">
554 <filter>functionMode['selector'] == 'second_step'</filter>
555 <filter>functionMode['analysis_mode']['selector'] == 'single'</filter>
556 </data>
557 </outputs>
558 <tests>
559 <!-- Test 01: Data import mode-->
560 <test expect_num_outputs="1">
561 <conditional name="functionMode">
562 <param name="selector" value="data_import"/>
563 <param name="genomeAnnotation" value="annotation_salmon.gtf.gz"/>
564 <param name="transcriptome" value="transcriptome.fasta.gz"/>
565 <param name="countFiles" value="disabled"/>
566 <section name="first_factor">
567 <param name="factorLevel" value="health"/>
568 <param name="trans_counts" value="salmon_cond1_rep1.sf,salmon_cond1_rep2.sf"/>
569 </section>
570 <section name="second_factor">
571 <param name="factorLevel" value="cancer"/>
572 <param name="trans_counts" value="salmon_cond2_rep1.sf,salmon_cond2_rep2.sf"/>
573 </section>
574 <conditional name="tool_source">
575 <param name="selector" value="salmon"/>
576 </conditional>
577 </conditional>
578 <output name="switchList" file="test01.RData" ftype="rdata" compare="sim_size" delta="100"/>
579 </test>
580 <!-- Test 02: Data import mode generate expression matrix-->
581 <test expect_num_outputs="3">
582 <conditional name="functionMode">
583 <param name="selector" value="data_import"/>
584 <param name="genomeAnnotation" value="annotation_salmon.gtf.gz"/>
585 <param name="transcriptome" value="transcriptome.fasta.gz"/>
586 <param name="countFiles" value="matrix"/>
587 <section name="first_factor">
588 <param name="factorLevel" value="health"/>
589 <param name="trans_counts" value="salmon_cond1_rep1.sf,salmon_cond1_rep2.sf"/>
590 </section>
591 <section name="second_factor">
592 <param name="factorLevel" value="cancer"/>
593 <param name="trans_counts" value="salmon_cond2_rep1.sf,salmon_cond2_rep2.sf"/>
594 </section>
595 <conditional name="tool_source">
596 <param name="selector" value="salmon"/>
597 </conditional>
598 </conditional>
599 <output name="switchList" ftype="rdata">
600 <assert_contents>
601 <has_size value="652170" delta="300"/>
602 </assert_contents>
603 </output>
604 <output name="matrix_counts" file="test02_counts.tabular" ftype="tabular" lines_diff="6"/>
605 <output name="sample_annotation" file="test02_samples_annotation.tabular" ftype="tabular"/>
606 </test>
607 <!-- Test 03: Data import mode generate collection count files-->
608 <test expect_num_outputs="3">
609 <conditional name="functionMode">
610 <param name="selector" value="data_import"/>
611 <param name="genomeAnnotation" value="annotation_salmon.gtf.gz"/>
612 <param name="transcriptome" value="transcriptome.fasta.gz"/>
613 <param name="countFiles" value="collection"/>
614 <section name="first_factor">
615 <param name="factorLevel" value="health"/>
616 <param name="trans_counts" value="salmon_cond1_rep1.sf,salmon_cond1_rep2.sf"/>
617 </section>
618 <section name="second_factor">
619 <param name="factorLevel" value="cancer"/>
620 <param name="trans_counts" value="salmon_cond2_rep1.sf,salmon_cond2_rep2.sf"/>
621 </section>
622 <conditional name="tool_source">
623 <param name="selector" value="salmon"/>
624 </conditional>
625 </conditional>
626 <output name="switchList" ftype="rdata">
627 <assert_contents>
628 <has_size value="652170" delta="300"/>
629 </assert_contents>
630 </output>
631 <output_collection name="collection_counts_factor1" type="list" count="2">
632 <element name="cancer0_dataset" file="test03_cancer_counts.tabular" ftype="tabular" lines_diff="6"/>
633 </output_collection>
634 <output_collection name="collection_counts_factor2" type="list" count="2">
635 <element name="health0_dataset" file="test03_health_counts.tabular" ftype="tabular" lines_diff="6"/>
636 </output_collection>
637 </test>
638 <!-- Test 04: Extract isoform switches all outputs-->
639 <test expect_num_outputs="4">
640 <conditional name="functionMode">
641 <param name="selector" value="first_step"/>
642 <param name="robject" value="test01.RData"/>
643 <param name="alpha" value="0.05"/>
644 <param name="dIFcutoff" value="0.1"/>
645 <param name="onlySigIsoforms" value="false"/>
646 <param name="filterForConsequences" value="false"/>
647 <param name="outputs_first" value="nt,aa,summary"/>
648 <section name="prefilter">
649 <param name="geneExpressionCutoff" value="1"/>
650 <param name="isoformExpressionCutoff" value="0"/>
651 <param name="IFcutoff" value="0.01"/>
652 <param name="removeSingleIsformGenes" value="true"/>
653 <param name="keepIsoformInAllConditions" value="false"/>
654 </section>
655 <section name="dexseq">
656 <param name="correctForConfoundingFactors" value="true"/>
657 <param name="overwriteIFvalues" value="true"/>
658 <param name="reduceToSwitchingGenes" value="true"/>
659 <param name="reduceFurtherToGenesWithConsequencePotential" value="false"/>
660 <param name="keepIsoformInAllConditions" value="true"/>
661 </section>
662 <section name="novel_isoform">
663 <param name="minORFlength" value="100"/>
664 <param name="orfMethod" value="longest.AnnotatedWhenPossible"/>
665 <param name="PTCDistance" value="50"/>
666 </section>
667 <section name="extract_sequence">
668 <param name="onlySwitchingGenes" value="true"/>
669 <param name="removeShortAAseq" value="true"/>
670 <param name="removeLongAAseq" value="false"/>
671 <param name="removeORFwithStop" value="true"/>
672 </section>
673 </conditional>
674 <output name="switchList" file="test04.RData" ftype="rdata" compare="sim_size" delta="100"/>
675 <output name="isoformAA" ftype="fasta">
676 <assert_contents>
677 <has_size value="138275" delta="300"/>
678 <has_text text=">TCONS_00000007"/>
679 <has_text text="MLLPPGSLSRPRTFSSQPLQT"/>
680 </assert_contents>
681 </output>
682 <output name="isoformNT" ftype="fasta">
683 <assert_contents>
684 <has_size value="780375" delta="300"/>
685 <has_text text=">TCONS_00000007"/>
686 <has_text text="GGGTCTCCCTCTGTTGTCCAAGGC"/>
687 </assert_contents>
688 </output>
689 <output name="switchSummary" file="test04_summary.tabular" ftype="tabular"/>
690 </test>
691 <!-- Test 05: Extract isoform switches alternative parameters-->
692 <test expect_num_outputs="1">
693 <conditional name="functionMode">
694 <param name="selector" value="first_step"/>
695 <param name="robject" value="test01.RData"/>
696 <param name="outputs_first" value=""/>
697 <section name="dexseq">
698 <param name="correctForConfoundingFactors" value="true"/>
699 <param name="overwriteIFvalues" value="true"/>
700 <param name="reduceToSwitchingGenes" value="true"/>
701 <param name="reduceFurtherToGenesWithConsequencePotential" value="true"/>
702 <param name="keepIsoformInAllConditions" value="true"/>
703 </section>
704 <section name="novel_isoform">
705 <param name="orfMethod" value="mostUpstream"/>
706 </section>
707 </conditional>
708 <output name="switchList" ftype="rdata">
709 <assert_contents>
710 <has_size value="500518" delta="300"/>
711 </assert_contents>
712 </output>
713 </test>
714 <!--Test 06: generate plots and summaries full analsys-->
715 <test expect_num_outputs="9">
716 <conditional name="functionMode">
717 <param name="selector" value="second_step"/>
718 <param name="robject" value="test04.RData"/>
719 <section name="analyzeSwitchConsequences">
720 <param name="ntCutoff" value="50"/>
721 <param name="ntJCsimCutoff" value="0.8"/>
722 <param name="AaCutoff" value="10"/>
723 <param name="AaFracCutoff" value="0.5"/>
724 <param name="AaJCsimCutoff" value="0.9"/>
725 <param name="removeNonConseqSwitches" value="true"/>
726 </section>
727 <conditional name="analysis_mode">
728 <param name="selector" value="top"/>
729 <param name="alpha" value="0.05"/>
730 <param name="dIFcutoff" value="0.1"/>
731 <param name="n" value="2"/>
732 <section name="advanced_options">
733 <param name="filterForConsequences" value="false"/>
734 <param name="sortByQvals" value="true"/>
735 <param name="onlySigIsoforms" value="false"/>
736 <param name="onlySwitchingGenes" value="true"/>
737 <param name="countGenes" value="true"/>
738 <param name="asFractionTotal" value="false"/>
739 <param name="plotGenes" value="false"/>
740 <param name="simplifyLocation" value="true"/>
741 <param name="removeEmptyConsequences" value="false"/>
742 <param name="analysisOppositeConsequence" value="false"/>
743 </section>
744 </conditional>
745 </conditional>
746 <output name="switchList" ftype="rdata">
747 <assert_contents>
748 <has_size value="531580" delta="300"/>
749 </assert_contents>
750 </output>
751 <output_collection name="plots_summary" type="list" count="7">
752 <element name="consequencesEnrichment" ftype="pdf">
753 <assert_contents>
754 <has_size value="5995" delta="300"/>
755 </assert_contents>
756 </element>
757 <element name="extractConsequencesSummary" ftype="pdf">
758 <assert_contents>
759 <has_size value="5681" delta="300"/>
760 </assert_contents>
761 </element>
762 <element name="splicingEnrichment" ftype="pdf">
763 <assert_contents>
764 <has_size value="6361" delta="300"/>
765 </assert_contents>
766 </element>
767 <element name="splicingGenomewide" ftype="pdf">
768 <assert_contents>
769 <has_size value="68069" delta="300"/>
770 </assert_contents>
771 </element>
772 <element name="splicingSummary" ftype="pdf">
773 <assert_contents>
774 <has_size value="5990" delta="300"/>
775 </assert_contents>
776 </element>
777 <element name="switchGene" ftype="pdf">
778 <assert_contents>
779 <has_size value="3611" delta="300"/>
780 </assert_contents>
781 </element>
782 <element name="volcanoPlot" ftype="pdf">
783 <assert_contents>
784 <has_size value="3611" delta="300"/>
785 </assert_contents>
786 </element>
787 </output_collection>
788 <output_collection name="genes_consequences" type="list" count="2">
789 <element name="1_switch_plot_NADK_aka_NADK" ftype="pdf">
790 <assert_contents>
791 <has_size value="8716" delta="300"/>
792 </assert_contents>
793 </element>
794 <element name="2_switch_plot_PRKCZ_aka_PRKCZ" ftype="pdf">
795 <assert_contents>
796 <has_size value="8463" delta="300"/>
797 </assert_contents>
798 </element>
799 </output_collection>
800 <output_collection name="genes_wo_consequences" type="list" count="2">
801 <element name="1_switch_plot_CLSTN1_aka_CLSTN1" ftype="pdf">
802 <assert_contents>
803 <has_size value="8039" delta="300"/>
804 </assert_contents>
805 </element>
806 <element name="2_switch_plot_ZBTB40_aka_ZBTB40" ftype="pdf">
807 <assert_contents>
808 <has_size value="7506" delta="300"/>
809 </assert_contents>
810 </element>
811 </output_collection>
812 <output name="mostSwitching" file="test06_switching.tabular" ftype="tabular" lines_diff="4"/>
813 <output name="consequencesSummary" file="test06_consequences_summary.tabular" ftype="tabular" lines_diff="4"/>
814 <output name="consequencesEnrichment" file="test06_consequences_enrichment.tabular" ftype="tabular" lines_diff="4"/>
815 <output name="splicingSummary" file="test06_splicing_summary.tabular" ftype="tabular" lines_diff="4"/>
816 <output name="splicingEnrichment" file="test06_splicing_enrichment.tabular" ftype="tabular" lines_diff="4"/>
817 </test>
818 <!--Test 07: generate plots and summaries full analsys all inputs-->
819 <test expect_num_outputs="9">
820 <conditional name="functionMode">
821 <param name="selector" value="second_step"/>
822 <param name="robject" value="test04.RData"/>
823 <section name="analyzeSwitchConsequences">
824 <param name="ntCutoff" value="20"/>
825 <param name="ntJCsimCutoff" value="0.5"/>
826 <param name="AaCutoff" value="10"/>
827 <param name="AaFracCutoff" value="0.4"/>
828 <param name="AaJCsimCutoff" value="0.8"/>
829 <param name="removeNonConseqSwitches" value="false"/>
830 </section>
831 <conditional name="analysis_mode">
832 <param name="selector" value="top"/>
833 <param name="alpha" value="0.05"/>
834 <param name="dIFcutoff" value="0.1"/>
835 <param name="n" value="2"/>
836 <section name="advanced_options">
837 <param name="filterForConsequences" value="false"/>
838 <param name="sortByQvals" value="true"/>
839 <param name="onlySigIsoforms" value="false"/>
840 <param name="onlySwitchingGenes" value="true"/>
841 <param name="countGenes" value="true"/>
842 <param name="asFractionTotal" value="false"/>
843 <param name="plotGenes" value="false"/>
844 <param name="simplifyLocation" value="true"/>
845 <param name="removeEmptyConsequences" value="false"/>
846 <param name="analysisOppositeConsequence" value="false"/>
847 </section>
848 </conditional>
849 <conditional name="coding_potential">
850 <param name="selector" value="cpc2"/>
851 <param name="analyzeCPC2" value="cpc2_result.txt"/>
852 <param name="removeNoncodingORFs" value="false"/>
853 <param name="codingCutoff" value="0.5"/>
854 </conditional>
855 <conditional name="protein_domains">
856 <param name="selector" value="enabled"/>
857 <param name="analyzePFAM" value="pfam_results.txt"/>
858 </conditional>
859 <conditional name="signal_peptides">
860 <param name="selector" value="enabled"/>
861 <param name="analyzeSignalP" value="signalP_results.txt"/>
862 <param name="minSignalPeptideProbability" value="0.5"/>
863 </conditional>
864 <conditional name="disordered_regions">
865 <param name="selector" value="iupred2a"/>
866 <param name="AanalyzeIUPred2A" value="iupred2a_result.txt.gz"/>
867 <param name="smoothingWindowSize" value="5"/>
868 <param name="probabilityCutoff" value="0.5"/>
869 <param name="minIdrSize" value="30"/>
870 <param name="annotateBindingSites" value="true"/>
871 <param name="minIdrBindingSize" value="15"/>
872 <param name="minIdrBindingOverlapFrac" value="0.8"/>
873 </conditional>
874 </conditional>
875 <output name="switchList" ftype="rdata">
876 <assert_contents>
877 <has_size value="542120" delta="300"/>
878 </assert_contents>
879 </output>
880 <output_collection name="plots_summary" type="list" count="7">
881 <element name="consequencesEnrichment" ftype="pdf">
882 <assert_contents>
883 <has_size value="5995" delta="300"/>
884 </assert_contents>
885 </element>
886 <element name="extractConsequencesSummary" ftype="pdf">
887 <assert_contents>
888 <has_size value="6617" delta="300"/>
889 </assert_contents>
890 </element>
891 <element name="splicingEnrichment" ftype="pdf">
892 <assert_contents>
893 <has_size value="6361" delta="300"/>
894 </assert_contents>
895 </element>
896 <element name="splicingGenomewide" ftype="pdf">
897 <assert_contents>
898 <has_size value="68069" delta="300"/>
899 </assert_contents>
900 </element>
901 <element name="splicingSummary" ftype="pdf">
902 <assert_contents>
903 <has_size value="5990" delta="300"/>
904 </assert_contents>
905 </element>
906 <element name="switchGene" ftype="pdf">
907 <assert_contents>
908 <has_size value="3611" delta="300"/>
909 </assert_contents>
910 </element>
911 <element name="volcanoPlot" ftype="pdf">
912 <assert_contents>
913 <has_size value="3611" delta="300"/>
914 </assert_contents>
915 </element>
916 </output_collection>
917 <output_collection name="genes_consequences" type="list" count="2">
918 <element name="1_switch_plot_NADK_aka_NADK" ftype="pdf">
919 <assert_contents>
920 <has_size value="8716" delta="300"/>
921 </assert_contents>
922 </element>
923 <element name="2_switch_plot_PRKCZ_aka_PRKCZ" ftype="pdf">
924 <assert_contents>
925 <has_size value="8463" delta="300"/>
926 </assert_contents>
927 </element>
928 </output_collection>
929 <output_collection name="genes_wo_consequences" type="list" count="2">
930 <element name="1_switch_plot_CLSTN1_aka_CLSTN1" ftype="pdf">
931 <assert_contents>
932 <has_size value="8559" delta="300"/>
933 </assert_contents>
934 </element>
935 <element name="2_switch_plot_ZBTB40_aka_ZBTB40" ftype="pdf">
936 <assert_contents>
937 <has_size value="8051" delta="300"/>
938 </assert_contents>
939 </element>
940 </output_collection>
941 <output name="mostSwitching" ftype="tabular">
942 <assert_contents>
943 <has_size value="4062" delta="50"/>
944 <has_text text="RPL11"/>
945 </assert_contents>
946 </output>
947 <output name="consequencesSummary" ftype="tabular">
948 <assert_contents>
949 <has_size value="1192" delta="50"/>
950 <has_text text="nrGenesWithConsequences"/>
951 </assert_contents>
952 </output>
953 <output name="consequencesEnrichment" ftype="tabular">
954 <assert_contents>
955 <has_size value="1432" delta="50"/>
956 <has_text text="NMD insensitive (paired with NMD sensitive"/>
957 </assert_contents>
958 </output>
959 <output name="splicingSummary" ftype="tabular">
960 <assert_contents>
961 <has_size value="892" delta="50"/>
962 <has_text text="MEE in isoform used less"/>
963 </assert_contents>
964 </output>
965 <output name="splicingEnrichment" ftype="tabular">
966 <assert_contents>
967 <has_size value="1157" delta="50"/>
968 <has_text text="A5 gain (paired with A5 loss)"/>
969 </assert_contents>
970 </output>
971 </test>
972 <!-- Test 08: analyze single gene-->
973 <test expect_num_outputs="2">
974 <conditional name="functionMode">
975 <param name="selector" value="second_step"/>
976 <param name="robject" value="test04.RData"/>
977 <conditional name="analysis_mode">
978 <param name="selector" value="single"/>
979 <param name="gene" value="NADK"/>
980 </conditional>
981 </conditional>
982 <output name="single_gene" ftype="pdf" file="test08_single_gene.pdf" compare="sim_size"/>
983 <output name="switchList" ftype="rdata">
984 <assert_contents>
985 <has_size value="531580" delta="300"/>
986 </assert_contents>
987 </output>
988 </test>
989 </tests>
990 <help><![CDATA[
991
992 .. class:: infomark
993
994 **Purpose**
995
996 IsoformSwitchAnalyzeR is an easy-to use-R package that enables statistical identification of isoform switching from RNA-seq derived quantification
997 of novel and/or annotated full-length isoforms. IsoformSwitchAnalyzeR facilitates integration of many sources of (predicted) annotation such as Open
998 Reading Frame (ORF/CDS), protein domains (via Pfam), signal peptides (via SignalP), Intrinsically Disordered Regions (IDR, via NetSurfP-2 or IUPred2A),
999 coding potential (via CPAT or CPC2) and sensitivity to Non-sense Mediated Decay (NMD) and more. The combination of identified isoform switches and
1000 their annotation enables IsoformSwitchAnalyzeR to predict potential functional consequences of the identified isoform switches — such as loss of
1001 protein domains — thereby identifying isoform switches of particular interest. Lastly, IsoformSwitchAnalyzeR provides article-ready visualization
1002 methods for isoform switches for individual genes as well as both summary statistics and visualization of the genome-wide changes/consequences of
1003 isoform switches, their consequences and the associated alternative splicing.
1004
1005 -----
1006
1007 .. class:: infomark
1008
1009 **Differential isoform expression (DIE) and differential isoform usage (DIU)**
1010
1011 Differential isoform expression (DIE) and differential isoform usage (DIU) are related but distinct concepts. DIE assesses the difference of
1012 absolute expression in isoform level. In contrast, DIU assesses the difference of relative expression in isoform level. For example, if the
1013 expression of two isoforms of one gene are 10 and 20 in control and 50 and 100 in case, then there is DIE but no DIU because the relative
1014 expression of the first isoform is 1/3 in both case and control.
1015
1016 -----
1017
1018 .. class:: infomark
1019
1020 **ORF identification methods (novel isoform analysis)**
1021
1022 - **Longest**: Identifies the longest ORF in the transcript (after filtering via minORFlength). This approach is similar to what the CPAT tool uses in it is analysis of coding potential.
1023 - **LongestAnnotated**: Identifies the longest ORF (after filtering via minORFlength) downstream of an annotated translation start site (which are supplied via the cds argument).
1024 - **Longest.AnnotatedWhenPossible**: A merge between "longestAnnotated" and "longest". For all isoforms where CDS start positions from known isoform overlap, only these CDS starts are considered and the longest ORF is annotated (similar to "longestAnnotated"). All isoforms without any overlapping CDS start sites they will be analysed with the "longest" approach.
1025 - **MostUpstream**: Identifies the most upstream ORF in the transcript (after filtering via minORFlength).
1026 - **MostUpstreamAnnoated**: Identifies the ORF (after filtering via minORFlength) downstream of the most upstream overlapping annotated translation start site (supplied via the cds argument).
1027
1028 ]]></help>
1029 <expand macro="citations" />
1030 </tool>