diff tools/mira4/mira4_mapping.xml @ 4:df86ed992a1b draft

Uploaded preview 4, lots of work on mapping
author peterjc
date Fri, 11 Oct 2013 04:28:45 -0400
parents 32f693f6e741
children ffefb87bd414
line wrap: on
line diff
--- a/tools/mira4/mira4_mapping.xml	Thu Sep 26 12:30:08 2013 -0400
+++ b/tools/mira4/mira4_mapping.xml	Fri Oct 11 04:28:45 2013 -0400
@@ -1,5 +1,5 @@
 <tool id="mira_4_0_mapping" name="MIRA v4.0 mapping" version="0.0.1">
-    <description>Takes Sanger, Roche, Illumina, Ion Torrent and PacBio data</description>
+    <description>Maps Sanger, Roche 454, Solexa/Illumina, Ion Torrent and PacBio reads</description>
     <requirements>
         <requirement type="python-module">Bio</requirement>
         <requirement type="binary">mira</requirement>
@@ -18,8 +18,18 @@
             <option value="accurate">Accurate</option>
             <option value="draft">Draft</option>
         </param>
+	<!-- TODO? Allow technology type for references? -->
+	<!-- TODO? Allow strain settings for reference(s) and reads? -->
+	<!-- TODO? Use a repeat to allow for multi-strain references? -->
+        <!-- TODO? Add strain to the mapping read groups? -->
+	<param name="references" type="data" format="fasta,fastq,mira" multiple="true" required="true" label="Backbone reference file(s)"
+               help="Multiple files allowed, for example one FASTA file per chromosome or plasmid." />
+        <param name="strain_setup" type="select" label="Strain configuration (reference vs reads)">
+            <option value="default">Different strains - mapping reads onto a related reference ('StrainX' vs 'ReferenceStrain')</option>
+            <option value="same">Same strain - mapping reads from same reference (all 'StrainX')</option>
+        </param>
         <repeat name="read_group" title="Read Group" min="1">
-            <param name="technology" type="select" label="Read technology" help="MIRA has different error models for different technologies">
+            <param name="technology" type="select" label="Read technology">
                 <option value="solexa">Solexa/Illumina</option>
                 <option value="sanger">Sanger cappillary sequencing</option>
                 <option value="454">Roche 454</option>
@@ -27,17 +37,15 @@
                 <option value="pcbiolq">PacBio low quality (raw)</option>
                 <option value="pcbiohq">PacBio high quality (corrected)</option>
                 <option value="text">Synthetic reads (database entries, consensus sequences, artifical reads, etc)</option>
-		<!-- TODO reference/backbone as an entry here? -->
             </param>
-	    <repeat name="reads" title="Reads" min="1" help="Paired reads can be combined into one file, or given as two files. MIRA will look at the read names to identify pairs.">
-                <param name="filename" type="data" format="fastq" label="Reads in FASTQ format" />
-            </repeat>
+            <param name="filenames" type="data" format="fastq,mira" multiple="true" required="true" label="Read file(s)"
+                   help="Multiple files allowed, for example paired reads can be given as two files (MIRA looks at read names to identify pairs)." />
         </repeat>
     </inputs>
     <outputs>
-        <data name="out_fasta" format="fasta" label="MIRA contigs (FASTA)" />
-        <data name="out_maf" format="mira" label="MIRA Assembly" />
-        <data name="out_log" format="txt" label="MIRA log" />
+        <data name="out_fasta" format="fasta" label="MIRA #if str($strain_setup)=='same' then 'same strain' else 'reference' # mapping contigs (FASTA)" />
+        <data name="out_maf" format="mira" label="MIRA #if str($strain_setup)=='same' then 'same strain' else 'reference' # mapping assembly" />
+        <data name="out_log" format="txt" label="MIRA #if str($strain_setup)=='same' then 'same strain' else 'reference' # mapping log" />
     </outputs>
     <configfiles>
         <configfile name="manifest">
@@ -54,21 +62,80 @@
 ## -DI:trt is short for -DIRECTORY:tmp_redirected_to and should
 ## point to a local hard drive (not something like NFS on network).
 
+##This bar goes into the manifest as a comment line
+#------------------------------------------------------------------------------
+
+readgroup
+is_reference
+#if str($strain_setup)=="same"
+strain = StrainX
+#end if
+#for $f in $references
+##Must now map Galaxy datatypes to MIRA file types...
+#if $f.ext.startswith("fastq")
+##MIRA doesn't like fastqsanger etc, just plain old fastq:
+data = fastq::$f
+#elif $f.ext == "mira"
+##We're calling *.maf the "mira" format in Galaxy (name space collision)
+data = maf::$f
+#elif $f.ext == "fasta"
+##We're calling MIRA with the file type as "fna" as otherwise it wants quals
+data = fna::$f
+#else
+##Currently don't expect anything else...
+data = ${f.ext}::$f
+#end if
+#end for
 #for $rg in $read_group
-#=======================================================
+
+##This bar goes into the manifest as a comment line
+#------------------------------------------------------------------------------
+
 readgroup
 technology = ${rg.technology}
+#if str($strain_setup)=="same"
+##This is perhaps redundant as MIRA defaults to StrainX for the reads:
+strain = StrainX
+#end if
 ##MIRA will accept multiple filenames on one data line, or multiple data lines
-#for f in $rg.reads
-data = ${f.filename}
+#for $f in $rg.filenames
+##Must now map Galaxy datatypes to MIRA file types...
+#if $f.ext.startswith("fastq")
+##MIRA doesn't like fastqsanger etc, just plain old fastq:
+data = fastq::$f
+#elif $f.ext == "mira"
+##We're calling *.maf the "mira" format in Galaxy (name space collision)
+data = maf::$f
+#else
+##Currently don't expect anything else...
+data = ${f.ext}::$f
+#end if
 #end for
-### Cheetah doesn't want dollar sign on list comprehension intermediate variables
-###set $files = ' '.join([str(f['filename']) for f in rg['reads']])
-##data = $files
 #end for
         </configfile>
     </configfiles>
     <tests>
+        <!-- Deliberately using default read_group.technology value "solexa"
+             as then Galaxy's broken <repeat> handling in tests should work... -->
+        <!-- Tests currently failing,
+             TwillException: more than one form; you must select one (use 'fv') before submitting
+        <test>
+            <param name="job_type" value="genome" />
+            <param name="job_quality" value="accurate" />
+            <param name="references" value="tvc_contigs.fasta" ftype="fasta" />
+            <param name="strain_setup" value="default" />
+            <param name="filenames" value="tvc_mini.fastq" ftype="fastqsanger" />
+            <output name="out_fasta" file="tvc_map_same_strain.fasta" ftype="fasta" />
+        </test>
+        <test>
+            <param name="job_type" value="genome" />
+            <param name="job_quality" value="accurate" />
+            <param name="references" value="tvc_contigs.fasta" ftype="fasta" />
+            <param name="strain_setup" value="same" />
+            <param name="filenames" value="tvc_mini.fastq" ftype="fastqsanger" />
+            <output name="out_fasta" file="tvc_map_ref_strain.fasta" ftype="fasta" />
+        </test>
+        -->
     </tests>
     <help>