comparison tools/mira4/mira4_mapping.xml @ 4:df86ed992a1b draft

Uploaded preview 4, lots of work on mapping
author peterjc
date Fri, 11 Oct 2013 04:28:45 -0400
parents 32f693f6e741
children ffefb87bd414
comparison
equal deleted inserted replaced
3:c7538ae82a24 4:df86ed992a1b
1 <tool id="mira_4_0_mapping" name="MIRA v4.0 mapping" version="0.0.1"> 1 <tool id="mira_4_0_mapping" name="MIRA v4.0 mapping" version="0.0.1">
2 <description>Takes Sanger, Roche, Illumina, Ion Torrent and PacBio data</description> 2 <description>Maps Sanger, Roche 454, Solexa/Illumina, Ion Torrent and PacBio reads</description>
3 <requirements> 3 <requirements>
4 <requirement type="python-module">Bio</requirement> 4 <requirement type="python-module">Bio</requirement>
5 <requirement type="binary">mira</requirement> 5 <requirement type="binary">mira</requirement>
6 <requirement type="package" version="4.0">MIRA</requirement> 6 <requirement type="package" version="4.0">MIRA</requirement>
7 </requirements> 7 </requirements>
16 </param> 16 </param>
17 <param name="job_quality" type="select" label="Assembly quality grade"> 17 <param name="job_quality" type="select" label="Assembly quality grade">
18 <option value="accurate">Accurate</option> 18 <option value="accurate">Accurate</option>
19 <option value="draft">Draft</option> 19 <option value="draft">Draft</option>
20 </param> 20 </param>
21 <!-- TODO? Allow technology type for references? -->
22 <!-- TODO? Allow strain settings for reference(s) and reads? -->
23 <!-- TODO? Use a repeat to allow for multi-strain references? -->
24 <!-- TODO? Add strain to the mapping read groups? -->
25 <param name="references" type="data" format="fasta,fastq,mira" multiple="true" required="true" label="Backbone reference file(s)"
26 help="Multiple files allowed, for example one FASTA file per chromosome or plasmid." />
27 <param name="strain_setup" type="select" label="Strain configuration (reference vs reads)">
28 <option value="default">Different strains - mapping reads onto a related reference ('StrainX' vs 'ReferenceStrain')</option>
29 <option value="same">Same strain - mapping reads from same reference (all 'StrainX')</option>
30 </param>
21 <repeat name="read_group" title="Read Group" min="1"> 31 <repeat name="read_group" title="Read Group" min="1">
22 <param name="technology" type="select" label="Read technology" help="MIRA has different error models for different technologies"> 32 <param name="technology" type="select" label="Read technology">
23 <option value="solexa">Solexa/Illumina</option> 33 <option value="solexa">Solexa/Illumina</option>
24 <option value="sanger">Sanger cappillary sequencing</option> 34 <option value="sanger">Sanger cappillary sequencing</option>
25 <option value="454">Roche 454</option> 35 <option value="454">Roche 454</option>
26 <option value="iontor">Ion Torrent</option> 36 <option value="iontor">Ion Torrent</option>
27 <option value="pcbiolq">PacBio low quality (raw)</option> 37 <option value="pcbiolq">PacBio low quality (raw)</option>
28 <option value="pcbiohq">PacBio high quality (corrected)</option> 38 <option value="pcbiohq">PacBio high quality (corrected)</option>
29 <option value="text">Synthetic reads (database entries, consensus sequences, artifical reads, etc)</option> 39 <option value="text">Synthetic reads (database entries, consensus sequences, artifical reads, etc)</option>
30 <!-- TODO reference/backbone as an entry here? -->
31 </param> 40 </param>
32 <repeat name="reads" title="Reads" min="1" help="Paired reads can be combined into one file, or given as two files. MIRA will look at the read names to identify pairs."> 41 <param name="filenames" type="data" format="fastq,mira" multiple="true" required="true" label="Read file(s)"
33 <param name="filename" type="data" format="fastq" label="Reads in FASTQ format" /> 42 help="Multiple files allowed, for example paired reads can be given as two files (MIRA looks at read names to identify pairs)." />
34 </repeat>
35 </repeat> 43 </repeat>
36 </inputs> 44 </inputs>
37 <outputs> 45 <outputs>
38 <data name="out_fasta" format="fasta" label="MIRA contigs (FASTA)" /> 46 <data name="out_fasta" format="fasta" label="MIRA #if str($strain_setup)=='same' then 'same strain' else 'reference' # mapping contigs (FASTA)" />
39 <data name="out_maf" format="mira" label="MIRA Assembly" /> 47 <data name="out_maf" format="mira" label="MIRA #if str($strain_setup)=='same' then 'same strain' else 'reference' # mapping assembly" />
40 <data name="out_log" format="txt" label="MIRA log" /> 48 <data name="out_log" format="txt" label="MIRA #if str($strain_setup)=='same' then 'same strain' else 'reference' # mapping log" />
41 </outputs> 49 </outputs>
42 <configfiles> 50 <configfiles>
43 <configfile name="manifest"> 51 <configfile name="manifest">
44 project = MIRA 52 project = MIRA
45 job = mapping,${job_type},${job_quality} 53 job = mapping,${job_type},${job_quality}
52 ## due to limitations of some downstream tools. 60 ## due to limitations of some downstream tools.
53 ## 61 ##
54 ## -DI:trt is short for -DIRECTORY:tmp_redirected_to and should 62 ## -DI:trt is short for -DIRECTORY:tmp_redirected_to and should
55 ## point to a local hard drive (not something like NFS on network). 63 ## point to a local hard drive (not something like NFS on network).
56 64
65 ##This bar goes into the manifest as a comment line
66 #------------------------------------------------------------------------------
67
68 readgroup
69 is_reference
70 #if str($strain_setup)=="same"
71 strain = StrainX
72 #end if
73 #for $f in $references
74 ##Must now map Galaxy datatypes to MIRA file types...
75 #if $f.ext.startswith("fastq")
76 ##MIRA doesn't like fastqsanger etc, just plain old fastq:
77 data = fastq::$f
78 #elif $f.ext == "mira"
79 ##We're calling *.maf the "mira" format in Galaxy (name space collision)
80 data = maf::$f
81 #elif $f.ext == "fasta"
82 ##We're calling MIRA with the file type as "fna" as otherwise it wants quals
83 data = fna::$f
84 #else
85 ##Currently don't expect anything else...
86 data = ${f.ext}::$f
87 #end if
88 #end for
57 #for $rg in $read_group 89 #for $rg in $read_group
58 #======================================================= 90
91 ##This bar goes into the manifest as a comment line
92 #------------------------------------------------------------------------------
93
59 readgroup 94 readgroup
60 technology = ${rg.technology} 95 technology = ${rg.technology}
96 #if str($strain_setup)=="same"
97 ##This is perhaps redundant as MIRA defaults to StrainX for the reads:
98 strain = StrainX
99 #end if
61 ##MIRA will accept multiple filenames on one data line, or multiple data lines 100 ##MIRA will accept multiple filenames on one data line, or multiple data lines
62 #for f in $rg.reads 101 #for $f in $rg.filenames
63 data = ${f.filename} 102 ##Must now map Galaxy datatypes to MIRA file types...
103 #if $f.ext.startswith("fastq")
104 ##MIRA doesn't like fastqsanger etc, just plain old fastq:
105 data = fastq::$f
106 #elif $f.ext == "mira"
107 ##We're calling *.maf the "mira" format in Galaxy (name space collision)
108 data = maf::$f
109 #else
110 ##Currently don't expect anything else...
111 data = ${f.ext}::$f
112 #end if
64 #end for 113 #end for
65 ### Cheetah doesn't want dollar sign on list comprehension intermediate variables
66 ###set $files = ' '.join([str(f['filename']) for f in rg['reads']])
67 ##data = $files
68 #end for 114 #end for
69 </configfile> 115 </configfile>
70 </configfiles> 116 </configfiles>
71 <tests> 117 <tests>
118 <!-- Deliberately using default read_group.technology value "solexa"
119 as then Galaxy's broken <repeat> handling in tests should work... -->
120 <!-- Tests currently failing,
121 TwillException: more than one form; you must select one (use 'fv') before submitting
122 <test>
123 <param name="job_type" value="genome" />
124 <param name="job_quality" value="accurate" />
125 <param name="references" value="tvc_contigs.fasta" ftype="fasta" />
126 <param name="strain_setup" value="default" />
127 <param name="filenames" value="tvc_mini.fastq" ftype="fastqsanger" />
128 <output name="out_fasta" file="tvc_map_same_strain.fasta" ftype="fasta" />
129 </test>
130 <test>
131 <param name="job_type" value="genome" />
132 <param name="job_quality" value="accurate" />
133 <param name="references" value="tvc_contigs.fasta" ftype="fasta" />
134 <param name="strain_setup" value="same" />
135 <param name="filenames" value="tvc_mini.fastq" ftype="fastqsanger" />
136 <output name="out_fasta" file="tvc_map_ref_strain.fasta" ftype="fasta" />
137 </test>
138 -->
72 </tests> 139 </tests>
73 <help> 140 <help>
74 141
75 **What it does** 142 **What it does**
76 143