Mercurial > repos > iuc > fastq_screen

diff fastq_screen.xml @ 0:8a8adbf98ecc draft
First upload
author: iuc
date: Fri, 16 May 2014 07:57:33 -0400
children: 3480daf4ed27
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_screen.xml	Fri May 16 07:57:33 2014 -0400
@@ -0,0 +1,189 @@
+<tool id="fastq_screen" name="fastq_screen" version="0.4.2">
+    <description>Screen for contamination</description>
+    <requirements>
+        <requirement type="package" version="0.4.2">fastq_screen</requirement>
+        <requirement type="package" version="2.1.0">bowtie2</requirement>
+    </requirements>
+    <command> 
+    fastq_screen --aligner="bowtie2" --outdir="." --conf="$fastqrunconf"
+    #if $sampN &gt; 0:
+    --subset "$sampN"
+    #end if
+    "$input1"
+    #if $singlePaired.sPaired == "paired":
+    "$input2" 
+    #end if
+    ; mv *_screen.png ${outpng} ; mv *_screen.txt ${outtext}
+    </command>
+
+    <stdio>
+        <regex match=".*" source="both" level="warning" description="fastqc_screen perl script output"/>
+    </stdio>
+
+    <inputs>
+        <param name="jobName" type="text" size="120" value="fastq_screen" label="Job narrative (included in output names as a reminder)" 
+          help="Only letters, numbers and underscores _ will be retained in this field">
+           <sanitizer invalid_char="">
+              <valid initial="string.letters,string.digits"><add value="_" /> </valid>
+           </sanitizer>
+        </param>
+        <param name="sampN" type="integer" size="20" value="500000" label="Sample this number of reads. Set to 0 or less to use all"
+        help="Time/precision trade off - fewer reads takes a little less time trading off precision of the estimates."/>
+        <conditional name="singlePaired">
+            <param name="sPaired" type="select" label="Single ended or mate-pair ended reads in this library?">
+              <option value="single" selected="true">Single-end</option>
+              <option value="paired">Paired-end</option>
+            </param>
+            <when value="single">
+                <param format="fastqsanger,fastq" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/>
+            </when>
+            <when value="paired">
+                <param format="fastqsanger,fastq" name="input1" type="data" label="RNA-Seq FASTQ file, forward reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
+                <param format="fastqsanger,fastq" name="input2" type="data" label="RNA-Seq FASTQ file, reverse reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
+            </when>
+        </conditional>
+
+        <!-- Genome source. -->
+        <repeat name="refGenomes" title="Installed organism reference sequences to check for alignment to your fastq" min="1"
+        help="For checking cell culture sequence for contamination, Mycoplasma Genitalium might be a good choice eg">
+            <param name="ref" type="select" label="Bowtie2 reference genome">
+                <options from_data_table="bowtie2_indexes">
+                <filter type="sort_by" column="3"/>
+                <validator type="no_options" message="No indexes are available for bowtie2"/>
+                </options>
+            </param>
+        </repeat>
+    </inputs>
+
+    <outputs>
+       <data format="tabular" name="outtext" label="${jobName}.xls"/>
+       <data format="png" name="outpng" label="${jobName}.png"/>
+    </outputs>
+    <configfiles>
+         <configfile name="fastqrunconf">
+###### autogenerated by fastq_screen.xml for fastq_screen run
+BOWTIE2 /data/app/bin/bowtie2
+#for $refs in $refGenomes:
+DATABASE    $refs.ref.fields.value $refs.ref.fields.path    BOWTIE2
+#end for
+         </configfile>
+    </configfiles>
+    
+<help>
+
+**What it does**
+This is a Galaxy wrapper exposing software from Babraham -fastq_screen_
+Designed to search sequence data in fastq files for matches to contaminants or to check the likely
+species.
+In QC checking, you can use it to look for (eg) sequence from contaminating mycoplasmae in cell cultures - it may be non-differential but it will be pro-inflammatory and, well, less than ideal.
+
+Here's the help from the perl script used by this wrapper:
+
+Fastq Screen - Screen sequences against a panel of databases
+
+Synopsis
+
+  fastq_screen [OPTION]... [FastQ FILE]...
+
+Function
+
+  Fastq Screen is intended to be used as part of a QC pipeline.
+  It allows you to take a sequence dataset and search it
+  against a set of bowtie databases.  It will then generate
+  both a text and a graphical summary of the results to see if
+  the sequence dataset contains the kind of sequences you expect
+  or not.
+
+Options
+
+  --help -h      Print program help and exit
+
+  --subset       Don't use the whole sequence file to search, but
+                 create a temporary dataset of this size. The
+                 dataset created will be of approximately (within
+                 a factor of 2) of this size. If the real dataset
+                 is smaller than twice the specified size then the
+                 whole dataset will be used. Subsets will be taken
+                 evenly from throughout the whole original dataset
+
+  --paired       Files are paired end. Files must be specified in
+                 the correct order with pairs of files coming
+                 immediately after one another. Results files will
+                 be named after the first file in the pair if the
+                 names differ between the two files.
+
+  --outdir       Specify a directory in which to save output files.
+                 If no directory is specified then output files
+                 are saved into the same directory as the input
+                 file.
+
+  --illumina1_3  Assume that the quality values are in encoded in
+                 Illumina v1.3 format. Defaults to Sanger format
+                 if this flag is not specified
+
+  --quiet        Supress all progress reports on stderr and only
+                 report errors
+
+  --version      Print the program version and exit
+
+  --threads      Specify across how many threads bowtie will be
+                 allowed to run. Overrides the default value set
+                 in the conf file
+
+  --conf         Manually specify a location for the configuration
+                 file to be used for this run. If not specified 
+                 then the file will be taken from the same directory 
+                 as the fastq_screen program
+
+  --color        FastQ files are in colorspace. This requires that 
+                 the libraries configures in the config file are 
+                 colorspace indices.
+
+  --bowtie       Specify extra parameters to be passed to bowtie. 
+                 These parameters should be quoted to clearly 
+                 delimit bowtie parameters from fastq_screen 
+                 parameters. You should not try to use this option 
+                 to override the normal search or reporting options 
+                 for bowtie which are set automatically but it might 
+                 be useful to allow reads to be trimmed before
+                 alignment etc.
+
+  --bowtie2      Specify extra parameters to be passed to bowtie 2. 
+                 These parameters should be quoted to clearly 
+                 delimit bowtie2 parameters from fastq_screen 
+                 parameters. You should not try to use this option 
+                 to override the normal search or reporting options 
+                 for bowtie which are set automatically but it might 
+                 be useful to allow reads to be trimmed before
+                 alignment etc.
+
+  --nohits       Writes to a file the sequences that did not map to 
+                 any of the specified genome libraries. If the 
+                 subset option is also specified, only reads from 
+                 the temporary dataset that failed to align to the
+                 reference genomes will be written to the output file.
+
+  --aligner     Specify the aligner to use for the mapping. Valid 
+                arguments are 'bowtie' or 'bowtie2'.
+  
+    
+**Attributions**
+
+Note that each component has its own license.
+Good luck with figuring out your obligations.
+
+fastq_screen - see the web site at Fastq_screen_
+
+Galaxy_ (that's what you are using right now!) for gluing everything together 
+
+
+Code and documentation comprising this tool was written by Ross Lazarus and that part is Licensed_ the same way as other rgenetics artefacts
+
+.. _Fastq_screen: http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen
+
+.. _Galaxy: http://getgalaxy.org
+
+.. _Licensed: https://www.gnu.org/licenses/lgpl.html
+
+</help>
+</tool>
author	iuc
date	Fri, 16 May 2014 07:57:33 -0400
parents
children	3480daf4ed27