annotate data_manager/rnastar_index_builder.xml @ 4:102bdfdda10b draft

Uploaded
author fubar
date Thu, 08 Jan 2015 17:53:14 -0500
parents 8a2d16bfdae2
children 503097df1670
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
1 <tool id="rnastar_index_builder_data_manager" name="rnastar index" tool_type="manage_data" version="0.0.1">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
2 <description>builder</description>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
3 <requirements>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
4 <requirement type="package" version="2.4.0d">rnastar</requirement>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
5 </requirements>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
6 <command interpreter="python">
4
102bdfdda10b Uploaded
fubar
parents: 2
diff changeset
7 rnastar_index_builder.py "${out_file}" --fasta_filename "${all_fasta_source.fields.path}"
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
8 --fasta_dbkey "${all_fasta_source.fields.dbkey}" --fasta_description "${all_fasta_source.fields.name}"
4
102bdfdda10b Uploaded
fubar
parents: 2
diff changeset
9 --runThreadN 1
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
10 #if $genemodel.modelformat=="gff3":
4
102bdfdda10b Uploaded
fubar
parents: 2
diff changeset
11 --sjdbGTFchrPrefix "${genemodel.sjdbGTFchrPrefix}"
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
12 --sjdbOverhang "${genemodel.sjdbOverhang}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
13 --sjdbGTFfile "${genemodel.sjdbGTFfile}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
14 --sjdbGTFtagExonParentTranscript "${genemodel.sjdbGTFtagExonParentTranscript}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
15 --sjdbGTFfeatureExon "${genemodel.sjdbGTFfeatureExon}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
16 #end if
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
17 #if $genemodel.modelformat=="bed":
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
18 --sjdbFileChrStartEnd "${genemodel.sjdbFileChrStartEnd}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
19 --sjdbOverhang "${genemodel.sjdbOverhang}"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
20 #end if
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
21 #if $genemodel.modelformat=="None":
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
22 --sjdbOverhang 0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
23 #end if
4
102bdfdda10b Uploaded
fubar
parents: 2
diff changeset
24 --data_table_name "rnastar_index"
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
25 </command>
4
102bdfdda10b Uploaded
fubar
parents: 2
diff changeset
26
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
27 <stdio>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
28 <regex match=".*" source="both" level="warning" description="stdout/err chatter:"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
29 </stdio>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
30 <inputs>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
31 <param name="all_fasta_source" type="select" label="Source FASTA Sequence">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
32 <options from_data_table="all_fasta"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
33 </param>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
34 <param type="text" name="sequence_name" value="" label="Informative name for sequence index" />
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
35 <param type="text" name="sequence_id" value="" label="ID for sequence index" />
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
36
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
37
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
38 <conditional name="genemodel">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
39 <param name="modelformat" type="select"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
40 label="Choose the format of gene model data from your history - bed or gff3"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
41 help="This will be the source of splice junction indexing if required">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
42 <option value="gff3" selected="true">gff3,gtf</option>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
43 <option value="bed">BED - tabular chr,start,end,strand</option>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
44 <option value="None" >None - no splice junction index</option>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
45 </param>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
46 <when value="gff3">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
47 <param type="data" format="gff3,gff" name="sjdbGTFfile" value="" label="Gene model - must be gff3 or compatible and must match the input genome"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
48 help="Required if you want to index splice junctions during index generation." />
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
49
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
50 <param type="text" name="sjdbGTFchrPrefix" value="chr" label="String prefix for GTF chromosomes"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
51 help='GTF prefix for chromosome names (e.g. "chr" to use ENSMEBL annotations with UCSC geneomes)' >
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
52 <sanitizer invalid_char="">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
53 <valid initial="string.printable"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
54 </sanitizer>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
55 </param>
4
102bdfdda10b Uploaded
fubar
parents: 2
diff changeset
56 <param type="text" name="sjdbGTFfeatureExon" value="exon" label="GTF feature to use as exon marker"
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
57 help="GTF feature type in GTF file to be used as exons for building transcripts - use what's in your GTF">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
58 <sanitizer invalid_char="">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
59 <valid initial="string.printable"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
60 </sanitizer>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
61 </param>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
62
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
63 <param type="text" name="sjdbGTFtagExonParentTranscript" value="transcript_id" label="GTF feature to define for each exon's parents"
4
102bdfdda10b Uploaded
fubar
parents: 2
diff changeset
64 help="GTF tag name to be used as exons' parents for building transcripts - use what's in your gene model file eg parent for gff3">
0
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
65 <sanitizer invalid_char="">
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
66 <valid initial="string.printable"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
67 </sanitizer>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
68 </param>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
69
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
70 <param type="integer" name="sjdbOverhang" value="100" label="Splice junction overhang. If=0, splice junction database NOT used"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
71 help="int>=0: length of the donor/acceptor sequence on each side, (mate_length - 1)" />
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
72
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
73 </when>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
74 <when value='bed'>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
75 <param type="data" format="bed" name="sjdbFileChrStartEnd" value="" label="Introns as a tabular bed (chr,start,end,strand) file matching the input genome"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
76 help="Required if you want to index splice junctions during index generation." />
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
77 <param type="integer" name="sjdbOverhang" value="100" label="Splice junction overhang. If=0, splice junction database NOT used"
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
78 help="int>=0: length of the donor/acceptor sequence on each side, (mate_length - 1)" />
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
79 </when>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
80 <when value='None'>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
81 </when>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
82 </conditional>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
83 </inputs>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
84 <outputs>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
85 <data name="out_file" format="data_manager_json"/>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
86 </outputs>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
87 <help>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
88
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
89 .. class:: infomark
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
90
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
91 *What it does*
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
92
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
93 This is a Galaxy datamanager for the rna STAR gap-aware RNA aligner.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
94
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
95 Please read the fine manual - that and the google group are the places to learn about the options above.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
96
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
97 *Note on sjdbOverhang*
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
98
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
99 From https://groups.google.com/forum/#!topic/rna-star/h9oh10UlvhI::
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
100
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
101 James is right, using large enough --sjdbOverhang is safer and should not generally cause any problems with reads of varying length. If your reads are very short, &lt;50b, then I would strongly recommend using optimum --sjdbOverhang=mateLength-1
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
102 By mate length I mean the length of one of the ends of the read, i.e. it's 100 for 2x100b PE or 1x100b SE. For longer reads you can simply use generic --sjdbOverhang 100.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
103 It is a bit confusing because of the way I named this parameter. --sjdbOverhang Noverhang is only used at the genome generation step for constructing the reference sequence out of the annotations.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
104 Basically, the Noverhang exonic bases from the donor site and Noverhang exonic bases from the acceptor site are spliced together for each of the junctions, and these spliced sequences are added to the genome sequence.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
105
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
106 At the mapping stage, the reads are aligned to both genomic and splice sequences simultaneously. If a read maps to one of spliced sequences and crosses the "junction" in the middle of it, the coordinates of two pspliced pieces are translated back to genomic space and added to the collection of mapped pieces, which are then all "stitched" together to form the final alignment. Since in the process of "maximal mapped length" search the read is split into pieces of no longer than --seedSearchStartLmax (=50 by default) bases, even if the read (mate) is longer than --sjdbOverhang, it can still be mapped to the spliced reference, as long as --sjdbOverhang > --seedSearchStartLmax.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
107
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
108 Cheers
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
109 Alex
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
110
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
111 *Note on gene model requirements for splice junctions*
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
112
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
113 From https://groups.google.com/forum/#!msg/rna-star/3Y_aaTuzBrE/lUylTB8h5vMJ::
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
114
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
115 When you generate a genome with annotations, you need to specify --sjdbOverhang value, which ideally should be equal to (oneMateLength-1), or you could use a generic value of ~100.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
116
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
117 Your gtf lines look fine to me. STAR needs 3 features from a GTF file:
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
118 1. Chromosome names in col.1 that agree with chromosome names in genome .fasta files. If you have "chr2L" names in the genome .fasta files, and "2L" in the .gtf file, then you need to use --sjdbGTFchrPrefix chr option.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
119 2. 'exon' in col.3 for the exons of all transcripts (this name can be changed with --sjdbGTFfeatureExon)
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
120 3. 'transcript_id' attribute that assigns each exon to a transcript (--this name can be changed with --sjdbGTFtagExonParentTranscript)
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
121
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
122 Cheers
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
123 Alex
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
124
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
125 **Notice:** If you leave name, description, or id blank, it will be generated automatically.
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
126
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
127 </help>
84f6e67cbae5 Initial commit - problems testing on my ram starved laptop - need to test on a bigger ram machine
fubar
parents:
diff changeset
128 </tool>