comparison fastq_screen.xml @ 0:8a8adbf98ecc draft

First upload
author iuc
date Fri, 16 May 2014 07:57:33 -0400
parents
children 3480daf4ed27
comparison
equal deleted inserted replaced
-1:000000000000 0:8a8adbf98ecc
1 <tool id="fastq_screen" name="fastq_screen" version="0.4.2">
2 <description>Screen for contamination</description>
3 <requirements>
4 <requirement type="package" version="0.4.2">fastq_screen</requirement>
5 <requirement type="package" version="2.1.0">bowtie2</requirement>
6 </requirements>
7 <command>
8 fastq_screen --aligner="bowtie2" --outdir="." --conf="$fastqrunconf"
9 #if $sampN &gt; 0:
10 --subset "$sampN"
11 #end if
12 "$input1"
13 #if $singlePaired.sPaired == "paired":
14 "$input2"
15 #end if
16 ; mv *_screen.png ${outpng} ; mv *_screen.txt ${outtext}
17 </command>
18
19 <stdio>
20 <regex match=".*" source="both" level="warning" description="fastqc_screen perl script output"/>
21 </stdio>
22
23 <inputs>
24 <param name="jobName" type="text" size="120" value="fastq_screen" label="Job narrative (included in output names as a reminder)"
25 help="Only letters, numbers and underscores _ will be retained in this field">
26 <sanitizer invalid_char="">
27 <valid initial="string.letters,string.digits"><add value="_" /> </valid>
28 </sanitizer>
29 </param>
30 <param name="sampN" type="integer" size="20" value="500000" label="Sample this number of reads. Set to 0 or less to use all"
31 help="Time/precision trade off - fewer reads takes a little less time trading off precision of the estimates."/>
32 <conditional name="singlePaired">
33 <param name="sPaired" type="select" label="Single ended or mate-pair ended reads in this library?">
34 <option value="single" selected="true">Single-end</option>
35 <option value="paired">Paired-end</option>
36 </param>
37 <when value="single">
38 <param format="fastqsanger,fastq" name="input1" type="data" label="RNA-Seq FASTQ file" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33"/>
39 </when>
40 <when value="paired">
41 <param format="fastqsanger,fastq" name="input1" type="data" label="RNA-Seq FASTQ file, forward reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
42 <param format="fastqsanger,fastq" name="input2" type="data" label="RNA-Seq FASTQ file, reverse reads" help="Nucleotide-space: Must have Sanger-scaled quality values with ASCII offset 33" />
43 </when>
44 </conditional>
45
46 <!-- Genome source. -->
47 <repeat name="refGenomes" title="Installed organism reference sequences to check for alignment to your fastq" min="1"
48 help="For checking cell culture sequence for contamination, Mycoplasma Genitalium might be a good choice eg">
49 <param name="ref" type="select" label="Bowtie2 reference genome">
50 <options from_data_table="bowtie2_indexes">
51 <filter type="sort_by" column="3"/>
52 <validator type="no_options" message="No indexes are available for bowtie2"/>
53 </options>
54 </param>
55 </repeat>
56 </inputs>
57
58 <outputs>
59 <data format="tabular" name="outtext" label="${jobName}.xls"/>
60 <data format="png" name="outpng" label="${jobName}.png"/>
61 </outputs>
62 <configfiles>
63 <configfile name="fastqrunconf">
64 ###### autogenerated by fastq_screen.xml for fastq_screen run
65 BOWTIE2 /data/app/bin/bowtie2
66 #for $refs in $refGenomes:
67 DATABASE $refs.ref.fields.value $refs.ref.fields.path BOWTIE2
68 #end for
69 </configfile>
70 </configfiles>
71
72 <help>
73
74 **What it does**
75 This is a Galaxy wrapper exposing software from Babraham -fastq_screen_
76 Designed to search sequence data in fastq files for matches to contaminants or to check the likely
77 species.
78 In QC checking, you can use it to look for (eg) sequence from contaminating mycoplasmae in cell cultures - it may be non-differential but it will be pro-inflammatory and, well, less than ideal.
79
80 Here's the help from the perl script used by this wrapper:
81
82 Fastq Screen - Screen sequences against a panel of databases
83
84 Synopsis
85
86 fastq_screen [OPTION]... [FastQ FILE]...
87
88 Function
89
90 Fastq Screen is intended to be used as part of a QC pipeline.
91 It allows you to take a sequence dataset and search it
92 against a set of bowtie databases. It will then generate
93 both a text and a graphical summary of the results to see if
94 the sequence dataset contains the kind of sequences you expect
95 or not.
96
97 Options
98
99 --help -h Print program help and exit
100
101 --subset Don't use the whole sequence file to search, but
102 create a temporary dataset of this size. The
103 dataset created will be of approximately (within
104 a factor of 2) of this size. If the real dataset
105 is smaller than twice the specified size then the
106 whole dataset will be used. Subsets will be taken
107 evenly from throughout the whole original dataset
108
109 --paired Files are paired end. Files must be specified in
110 the correct order with pairs of files coming
111 immediately after one another. Results files will
112 be named after the first file in the pair if the
113 names differ between the two files.
114
115 --outdir Specify a directory in which to save output files.
116 If no directory is specified then output files
117 are saved into the same directory as the input
118 file.
119
120 --illumina1_3 Assume that the quality values are in encoded in
121 Illumina v1.3 format. Defaults to Sanger format
122 if this flag is not specified
123
124 --quiet Supress all progress reports on stderr and only
125 report errors
126
127 --version Print the program version and exit
128
129 --threads Specify across how many threads bowtie will be
130 allowed to run. Overrides the default value set
131 in the conf file
132
133 --conf Manually specify a location for the configuration
134 file to be used for this run. If not specified
135 then the file will be taken from the same directory
136 as the fastq_screen program
137
138 --color FastQ files are in colorspace. This requires that
139 the libraries configures in the config file are
140 colorspace indices.
141
142 --bowtie Specify extra parameters to be passed to bowtie.
143 These parameters should be quoted to clearly
144 delimit bowtie parameters from fastq_screen
145 parameters. You should not try to use this option
146 to override the normal search or reporting options
147 for bowtie which are set automatically but it might
148 be useful to allow reads to be trimmed before
149 alignment etc.
150
151 --bowtie2 Specify extra parameters to be passed to bowtie 2.
152 These parameters should be quoted to clearly
153 delimit bowtie2 parameters from fastq_screen
154 parameters. You should not try to use this option
155 to override the normal search or reporting options
156 for bowtie which are set automatically but it might
157 be useful to allow reads to be trimmed before
158 alignment etc.
159
160 --nohits Writes to a file the sequences that did not map to
161 any of the specified genome libraries. If the
162 subset option is also specified, only reads from
163 the temporary dataset that failed to align to the
164 reference genomes will be written to the output file.
165
166 --aligner Specify the aligner to use for the mapping. Valid
167 arguments are 'bowtie' or 'bowtie2'.
168
169
170 **Attributions**
171
172 Note that each component has its own license.
173 Good luck with figuring out your obligations.
174
175 fastq_screen - see the web site at Fastq_screen_
176
177 Galaxy_ (that's what you are using right now!) for gluing everything together
178
179
180 Code and documentation comprising this tool was written by Ross Lazarus and that part is Licensed_ the same way as other rgenetics artefacts
181
182 .. _Fastq_screen: http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen
183
184 .. _Galaxy: http://getgalaxy.org
185
186 .. _Licensed: https://www.gnu.org/licenses/lgpl.html
187
188 </help>
189 </tool>