Mercurial > repos > bgruening > upload_testing
changeset 44:894ba1eba734
Uploaded
| author | bgruening | 
|---|---|
| date | Fri, 02 Aug 2013 12:39:14 -0400 | 
| parents | 975312d6c591 | 
| children | ef436465bf16 | 
| files | bamCompare.xml bamCorrelate.xml bamFingerprint.xml computeGCBias.xml computeMatrix.xml correctGCBias.xml | 
| diffstat | 6 files changed, 972 insertions(+), 0 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamCompare.xml Fri Aug 02 12:39:14 2013 -0400 @@ -0,0 +1,213 @@ +<tool id="bamCompare" name="bamCompare" version="1.0"> + <description>Normalize and compare two BAM files to output ratio, log2ratio or difference.</description> + <requirements> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="python-module">argsparse</requirement> + <requirement type="python-module">pysam</requirement> + <requirement type="python-module">numpy</requirement> + </requirements> + <command> + bamCompare + --bamfile1 '$bamFile1' + -bai1 '${bamFile1.metadata.bam_index}' + --bamfile2 '$bamFile2' + -bai2 '${bamFile2.metadata.bam_index}' + + --outFileName '$outFileName' + --outFileFormat '$outFileFormat' + + --fragmentLength $fragmentLength + --binSize $binSize + + #if $scaling.method == 'SES': + --scaleFactorsMethod SES + --sampleLength $scaling.sampleLength + #elif $scaling.method == 'readCount': + --scaleFactorsMethod readCount + #elif $scaling.method == 'own': + --scaleFactors '$scaling.scaleFactor1:$scaling.scaleFactor2' + #end if + + --ratio $comparison.type + + + #if $comparison.type=='subtract': + #if $comparison.normalization.type=='rpkm': + --normalizeUsingRPKM + #elif $comparison.normalization.type=='1x': + --normalizeTo1x $comparison.normalization.normalizeTo1x + #end if + #end if + + #if $advancedOpt.showAdvancedOpt == "yes": + #if $advancedOpt.smoothLength: + --smoothLength '$advancedOpt.smoothLength' + #end if + + #if str($advancedOpt.region.value) != '': + --region '$advancedOpt.region' + #end if + $advancedOpt.doNotExtendPairedEnds + $advancedOpt.ignoreDuplicates + + #if $advancedOpt.minMappingQuality: + --minMappingQuality '$advancedOpt.minMappingQuality' + #end if + + --missingDataAsZero $advancedOpt.missingDataAsZero + + #end if + --numberOfProcessors 4 + + + </command> + + <inputs> + <param name="bamFile1" format="bam" type="data" label="Treatment BAM file" + help="The BAM file must be sorted and indexed."/> + + <param name="bamFile2" format="bam" type="data" label="Input BAM file" + help="The BAM file must be sorted and indexed."/> + + <param name="fragmentLength" type="integer" value="300" min="1" + label="Length of the average fragment size" + help ="Reads will be extended to match this length unless they are paired-end, in which case they will be extended to match the fragment length. If this value is set to the read length or smaller, the read will not be extended. *Warning* the fragment length affects the normalization to 1x (see "normalize coverage to 1x"). The formula to normalize using the sequencing depth is genomeSize/(number of mapped reads * fragment length). *NOTE*: If the BAM files contain mated and unmated paired-end reads, unmated reads will be extended to match the fragment length."/> + + <param name="binSize" type="integer" value="50" min="1" + label="Bin size in bp" + help="The genome will be divided in bins (also called tiles) of the specified length. For each bin the overlaping number of fragments (or reads) will be reported. If only half a fragment overlaps, this fraction will be reported. "/> + + + <conditional name="scaling"> + <param name="method" type="select" + label="Method to use for scaling the largest sample to the smallest"> + <option value="readCount" selected="true">read count</option> + <option value="SES">signal extraction scaling (SES)</option> + <option value="own">enter own scaling factors</option> + </param> + <when value="SES"> + <param name="sampleLength" type="integer" value="1000" min="10" + label="Length in base pairs used to sample the genome and compute the size or scaling factors to compare the two BAM files " + help="The default is fine. Only change it if you know what you are doing" /> + </when> + <when value="readCount" /> + <when value="own"> + <param name="scaleFactor1" type="float" value="1" + label="Scale factor for treatment"/> + + <param name="scaleFactor2" type="float" value="1" + label="Scale factor for input"/> + </when> + </conditional> + + <conditional name="comparison"> + <param name="type" type="select" + label="How to compare the two files"> + <option value="log2" selected="true">compute log2 of the number of reads ratio</option> + <option value="ratio">compute the ratio of the number of reads</option> + <option value="subtract">compute difference (subtract input from treatment) of the number of reads</option> + </param> + <when value="log2" /> + <when value="ratio" /> + <when value="subtract"> + <conditional name="normalization"> + <param name="type" type="select" label="Normalization method" > + <option value="1x">Normalize coverage to 1x</option> + <option value="rpkm">Normalize to fragments (reads) per kilobase per million (RPKM)</option> + <option value="no">Do not normalize or scale</option> + </param> + <when value="rpkm" /> + <when value="no" /> + <when value="1x"> + <param name="normalizeTo1x" type="integer" value="2150570000" + label="Report normalized coverage to 1x sequenceing depth" + help ="Sequencing depth is defined as the total number of mapped reads * fragment length / effective genome size. To use this option, the effective genome size has to be given. Common values are: mm9: 2150570000, hg19:2451960000, dm3:121400000 and ce10:93260000."/> + </when> + </conditional> + </when> + </conditional> + + + + <param name="outFileFormat" type="select" label="Coverage file format"> + <option value="bigwig" selected="true">bigwig</option> + <option value="bedgraph">bedgraph</option> + </param> + + <conditional name="advancedOpt"> + <param name="showAdvancedOpt" type="select" label="Show advanced options" > + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <when value="no" /> + <when value="yes"> + + <param name="smoothLength" type="integer" value="1" optional="true" min="1" + label="Smooth values using the following length (in bp)" + help ="The smooth length defines a window, larger than the bin size, to average the number of reads. For example, if the bin size is set to 20 bp and the smooth length is set to 60 bp, then, for each bin size the average of it and its left and right neighbors is considered. Any value smaller than the bin size will be ignored and no smoothing will be applied."/> + + <param name="region" type="text" value="" + label="Region of the genome to limit the operation to" + help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example "chr10" or "chr10:456700:891000"" /> + + <param name="doNotExtendPairedEnds" type="boolean" truevalue="--doNotExtendPairedEnds" falsevalue="" + label="Do not extend paired ends" + help="If set, reads are not extended to match the fragment length reported in the BAM file, instead they will be extended to match the fragment length. Default is to extend the reads if paired end information is available."/> + + <param name="ignoreDuplicates" type="boolean" truevalue="--ignoreDuplicates" falsevalue="" + label="Ignore duplicates" + help="If set, reads that have the same orientation and start position will be considered only once. If reads are paired, the mate position also has to coincide to ignore a read." /> + + <param name="minMappingQuality" type="integer" optional="true" value="1" min="1" + label="Minimum mapping quality" + help= "If set, only reads that have a mapping quality score higher than the given value are considered"/> + + <param name="missingDataAsZero" type="boolean" truevalue="yes" falsevalue="no" checked="True" + label ="Treat missing data as zero" + help ="This parameter determines if missing data should be treated as zeros. If unchecked, missing data will be ignored and not included in the output file. Missing data is defined as those regions for which both BAM files have 0 reads." /> + + </when> + </conditional> + + </inputs> + <outputs> + <data format="bigwig" name="outFileName"> + <change_format> + <when input="outFileFormat" value="bigwig" format="bigwig" /> + <when input="outFileFormat" value="bedgraph" format="bedgraph" /> + </change_format> + </data> + </outputs> + <help> + +**What it does** + +This tool compares two BAM files based on the number of mapped reads. To +compare the BAM files the genome is partitioned into bins of equal size, then +the number of reads found in each BAM file are counted for such bins and +finally a summarizing value is reported. This vaule can be the ratio of the +number of reads per bin, the log2 of the ratio or the difference. This tool +can normalize the number of reads on each BAM file using the SES method +proposed by Diaz et al. (2012). "Normalization, bias correction, and peak +calling for ChIP-seq". Statistical applications in genetics and molecular +biology, 11(3). Normalization based on read counts is also available. The +output is either a bedgraph or a bigwig file containing the bin location and +the resulting comparison values. By default if reads are mated the fragment +length reported in the BAM file is used. + +----- + +.. class:: infomark + +Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. + +This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. + + +.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ +.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de +.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de + + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamCorrelate.xml Fri Aug 02 12:39:14 2013 -0400 @@ -0,0 +1,165 @@ +<tool id="bamCorrelate" name="bamCorrelate" version="1.0"> + <description>corrlates pairs of bam files</description> + + <command> + #set files=[] + #set labels=[] + #for $i in $inputs + #set $files += [str($i.bamfile)] + #if str($i.label.value) != "": + #set $labels += ["\"%s\"" % ($i.label.value)] + #else + #set $labels += ["\"%s\"" % ($i.bamfile.name)] + #end if + #end for + bamCorrelate + --bamfiles #echo " ".join($files) + --labels #echo " ".join($labels) + + --fragmentLength $fragmentLength + --corMethod $corMethod + + #set newoutFileName=str($outFileName)+".png" + --plotFile $newoutFileName + + #if $outputOpt.showOutputOpt == "yes" + #if $outputOpt.outFileRawCounts: + --outRawCounts '$outputOpt.outFileRawCounts' + #end if + #if $outputOpt.outFileCorMatrix: + --outFileCorMatrix '$outputOpt.outFileCorMatrix' + #end if + #end if + + #if $advancedOpt.showAdvancedOpt == "yes": + #if $advancedOpt.smoothLength: + --smoothLength '$advancedOpt.smoothLength' + #end if + + #if str($advancedOpt.region.value) != '': + --region '$advancedOpt.region' + #end if + + --binSize '$advancedOpt.binSize' + --numberOfSamples '$advancedOpt.numberOfSamples' + + $advancedOpt.doNotExtendPairedEnds + $advancedOpt.ignoreDuplicates + $advancedOpt.includeZeros + + #if $advancedOpt.minMappingQuality: + --minMappingQuality '$advancedOpt.minMappingQuality' + #end if + #end if + + --numberOfProcessors 4; mv $newoutFileName $outFileName + </command> + + <inputs> + + <repeat name="inputs" title="Input files" min="2"> + <param name="bamfile" type="data" format="bam" + label="Bam file" + help="The BAM file must be sorted and indexed."/> + <param name="label" type="text" size="30" optional="true" value="" + label="Label" + help="Label to use in the output. If not given the dataset name will be used instead."/> + </repeat> + + <param name="fragmentLength" type="integer" value="300" min="1" + label="Length of the average fragment size" + help ="Reads will be extended to match this length unless they are paired-end, in which case they will be extended to match the fragment length. If this value is set to the read length or smaller, the read will not be extended. *Warning* the fragment length affects the normalization to 1x (see "normalize coverage to 1x"). The formula to normalize using the sequencing depth is genomeSize/(number of mapped reads * fragment length). *NOTE*: If the BAM files contain mated and unmated paired-end reads, unmated reads will be extended to match the fragment length."/> + + <param name="corMethod" type="select" label="Correlation method"> + <option value="pearson">Pearson</option> + <option value="spearman">Spearman</option> + </param> + + <conditional name="advancedOpt"> + <param name="showAdvancedOpt" type="select" label="Show advanced options" > + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="smoothLength" type="integer" value="1" optional="true" min="1" + label="Smooth values using the following length (in bp)" + help ="The smooth length defines a window, larger than the bin size, to average the number of reads. For example, if the bin size is set to 20 bp and the smooth length is set to 60 bp, then, for each bin size the average of it and its left and right neighbors is considered. Any value smaller than the bin size will be ignored and no smoothing will be applied."/> + + <param name="region" type="text" value="" + label="Region of the genome to limit the operation to" + help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example "chr10" or "chr10:456700:891000"" /> + + <param name="binSize" type="integer" value="10000" min="1" + label="Bin size in bp" + help="Length in base pairs for a window used to sample the genome."/> + + <param name="numberOfSamples" type="integer" value="100000" min="1" + label="Number of samples" + help="Number of samples taken from the genome to compute the scaling factors"/> + + <param name="doNotExtendPairedEnds" type="boolean" truevalue="--doNotExtendPairedEnds" falsevalue="" + label="Do not extend paired ends" + help="If set, reads are not extended to match the fragment length reported in the BAM file, instead they will be extended to match the fragment length. Default is to extend the reads if paired end information is available."/> + + <param name="ignoreDuplicates" type="boolean" truevalue="--ignoreDuplicates" falsevalue="" + label="Ignore duplicates" + help="If set, reads that have the same orientation and start position will be considered only once. If reads are paired, the mate position also has to coincide to ignore a read." /> + + <param name="minMappingQuality" type="integer" optional="true" value="1" min="1" + label="Minimum mapping quality" + help= "If set, only reads that have a mapping quality score higher than the given value are considered"/> + + <param name="includeZeros" type="boolean" truevalue="--includeZeros" falsevalue="" + label ="Include zeros" + help ="If set, then zero counts that happen for *all* bam files given are included. The default behavior is to ignore those cases" /> + + </when> + </conditional> + + <conditional name="outputOpt"> + <param name="showOutputOpt" type="select" label="Show additional output options" > + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="saveRawCounts" type="boolean" label="Save the bin counts"/> + <param name="saveCorMatrix" type="boolean" label="Save the correlation matrix"/> + </when> + </conditional> + + </inputs> + <outputs> + <data format="png" name="outFileName" /> + <data format="tabular" name="outFileRawCounts" label="${tool.name} on ${on_string}: bin counts"> + <filter>(outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveRawCounts'] == True)</filter> + </data> + <data format="tabular" name="outFileCorMatrix" label="${tool.name} on ${on_string}: correlation matrix"> + <filter>(outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveCorMatrix'] == True)</filter> + </data> + </outputs> + <help> + +**What it does** + +Genomes are split into bins of given length. For each bin the number of reads +found for each of the bam files is counted. A correlation is computed for all +pairs of bam files. + +----- + +.. class:: infomark + +Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. + +This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. + + +.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ +.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de +.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de + + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamFingerprint.xml Fri Aug 02 12:39:14 2013 -0400 @@ -0,0 +1,151 @@ +<tool id="bamFingerprint" name="bamFingerprint" version="1.0"> + <description>plots profiles of bam files</description> + + <command> + #set files=[] + #set labels=[] + #for $i in $inputs + #set $files += [str($i.bamfile)] + #if str($i.label.value) != "": + #set $labels += ["\"%s\"" % ($i.label.value)] + #else + #set $labels += ["\"%s\"" % ($i.bamfile.name)] + #end if + #end for + bamFingerprint + --bamfiles #echo " ".join($files) + --labels #echo " ".join($labels) + + --fragmentLength $fragmentLength + + #set newoutFileName=str($outFileName)+".png" + --plotFile $newoutFileName + + #if $outputOpt.showOutputOpt == "yes" + #if $outputOpt.saveRawCounts: + --outRawCounts '$outFileRawCounts' + #end if + #end if + + #if $advancedOpt.showAdvancedOpt == "yes": + #if $advancedOpt.smoothLength: + --smoothLength '$advancedOpt.smoothLength' + #end if + + #if str($advancedOpt.region.value) != '': + --region '$advancedOpt.region' + #end if + + --binSize '$advancedOpt.binSize' + --numberOfSamples '$advancedOpt.numberOfSamples' + + $advancedOpt.doNotExtendPairedEnds + $advancedOpt.ignoreDuplicates + $advancedOpt.skipZeros + + #if $advancedOpt.minMappingQuality: + --minMappingQuality '$advancedOpt.minMappingQuality' + #end if + #end if + + --numberOfProcessors 4; mv $newoutFileName $outFileName + </command> + + <inputs> + + <repeat name="inputs" title="Input files" min="2"> + <param name="bamfile" type="data" format="bam" + label="Bam file" + help="The BAM file must be sorted and indexed."/> + <param name="label" type="text" size="30" optional="true" value="" + label="Label" + help="Label to use in the output. If not given the dataset name will be used instead."/> + </repeat> + + <param name="fragmentLength" type="integer" value="200" min="1" + label="Length of the average fragment size"/> + + <conditional name="advancedOpt"> + <param name="showAdvancedOpt" type="select" label="Show advanced options" > + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="smoothLength" type="integer" value="1" optional="true" min="1" + label="Smooth values using the following length (in bp)" + help ="The smooth length defines a window, larger than the bin size, to average the number of reads. For example, if the bin size is set to 20 bp and the smooth length is set to 60 bp, then, for each bin size the average of it and its left and right neighbors is considered. Any value smaller than the bin size will be ignored and no smoothing will be applied."/> + + <param name="region" type="text" value="" + label="Region of the genome to limit the operation to" + help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example "chr10" or "chr10:456700:891000"" /> + + <param name="binSize" type="integer" value="10000" min="1" + label="Bin size in bp" + help="Length in base pairs for a window used to sample the genome."/> + + <param name="numberOfSamples" type="integer" value="100000" min="1" + label="Number of samples" + help="Number of samples taken from the genome to compute the scaling factors"/> + + <param name="doNotExtendPairedEnds" type="boolean" truevalue="--doNotExtendPairedEnds" falsevalue="" + label="Do not extend paired ends" + help="If set, reads are not extended to match the fragment length reported in the BAM file, instead they will be extended to match the fragment length. Default is to extend the reads if paired end information is available."/> + + <param name="ignoreDuplicates" type="boolean" truevalue="--ignoreDuplicates" falsevalue="" + label="Ignore duplicates" + help="If set, reads that have the same orientation and start position will be considered only once. If reads are paired, the mate position also has to coincide to ignore a read." /> + + <param name="minMappingQuality" type="integer" optional="true" value="1" min="1" + label="Minimum mapping quality" + help= "If set, only reads that have a mapping quality score higher than the given value are considered"/> + + <param name="skipZeros" type="boolean" truevalue="--skipZeros" falsevalue="" + label ="Include zeros" + help ="If set, then zero counts that happen for *all* bam files given are ignored. This will result in a reduced number of read counts than the specified in number of samples" /> + + </when> + </conditional> + + <conditional name="outputOpt"> + <param name="showOutputOpt" type="select" label="Show additional output options" > + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="saveRawCounts" type="boolean" label="Save the bin counts"/> + </when> + </conditional> + + </inputs> + <outputs> + <data format="png" name="outFileName" /> + <data format="tabular" name="outFileRawCounts" label="${tool.name} on ${on_string}: bin counts"> + <filter>(outputOpt['showOutputOpt'] == 'yes' and outputOpt['saveRawCounts'] == True)</filter> + </data> + </outputs> + <help> + +**What it does** + +Samples indexed bam files and plots a profile for each bam file. At each +sample position all reads overlaping a window (bin) of specified length are +counted. This counts are then sorted and the cumulative sum plotted + +----- + +.. class:: infomark + +Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. + +This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. + + +.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ +.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de +.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de + + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/computeGCBias.xml Fri Aug 02 12:39:14 2013 -0400 @@ -0,0 +1,144 @@ +<tool id="computeGCBias" name="computeGCBias" version="1.0"> + <description></description> + <stdio> + <exit_code range="0" level="warning" description="Warning" /> + </stdio> + <command> + computeGCBias + --bamfile '$bamInput' + --species '$species' + --GCbiasFrequenciesFile $outFileName + --fragmentLength $fragmentLength + + #if $source.ref_source=="history": + --genome $source.input1 + #else: + --genome "${source.input1_2bit.fields.path}" + #end if + + #if $advancedOpt.showAdvancedOpt == "yes": + #if str($advancedOpt.region.value) != '': + --region '$advancedOpt.region' + #end if + + --binSize '$advancedOpt.binSize' + --sampleSize '$advancedOpt.sampleSize' + --regionSize '$advancedOpt.regionSize' + + #if $advancedOpt.filterOut: + --filterOut $advancedOpt.filterOut + #end if + + #if $advancedOpt.extraSampling: + --extraSampling $advancedOpt.extraSampling + #end if + + #end if + + #set move="" + #if $output.showOutputSettings == "yes" + #if $output.saveBiasPlot: + --biasPlot biasPlot.png + #set move="mv biasPlot.png $biasPlot" + #end if + #end if + ; $move + + </command> + + <inputs> + + <param name="bamInput" format="bam" type="data" label="Input BAM file" + help="The BAM file must be sorted and indexed."/> + + <param name="species" type="text" value="" label="Species name abbreviation" /> + + <conditional name="source"> + <param name="ref_source" type="select" label="Reference genome"> + <option value="cached">locally cached</option> + <option value="history">in your history</option> + </param> + <when value="cached"> + <param name="input1_2bit" type="select" label="Using reference genome" help="If your genome of interest is not listed, contact the Galaxy team"> + <options from_data_table="lastz_seqs" /> + </param> + </when> + <when value="history"> + <param name="input1" type="data" format="bam" label="Select a reference dataset in 2bit format" /> + </when> + </conditional> + <param name="fragmentLength" type="integer" value="300" min="1" + label="Fragment length used for the sequencing" + help ="If paired-end reads are used the fragment length is computed based from the bam file."/> + + <conditional name="advancedOpt"> + <param name="showAdvancedOpt" type="select" label="Show advanced options" > + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="region" type="text" value="" + label="Region of the genome to limit the operation to" + help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example "chr10" or "chr10:456700:891000"" /> + + <param name="binSize" type="integer" value="50" min="1" + label="Bin size in bp" + help="Size of the bins in bp for the ouput of the bigwig/bedgraph file."/> + + <param name="sampleSize" type="integer" value="50000000" min="1" + label="Number of sampling points to be considered" /> + + <param name="regionSize" type="integer" value="300" min="1" + label="Region size" + help ="To plot the reads per GC over a region the size of the region is required. By default, the bin size is set to 300bp, which is close to the standard fragment size for Illumina machines. However, if the depth of sequencing is low a larger bin size will be required, otherwise many bins will not overlap with any read."/> + + <param name="filterOut" type="data" format="bed" optional="true" + label="BED file containing genomic regions to be excluded from the estimation of the correction" + help="Such regions usually contain repetitive regions and peaks that if included will bias the correction. It is recommended to filter out known repetitive regions if multi-reads (reads that map to more than one genomic position) were excluded. In the case of ChIP-seq data, it is recommended to first use a peak caller to identify and filter out the identified peaks." /> + <param name="extraSampling" type="data" format="bed" optional="true" + label="BED file containing genomic regions for which extra sampling is required because they are underrepresented in the genome" + help="" /> + </when> + </conditional> + + <conditional name="output" > + <param name="showOutputSettings" type="select" label="Show additional output options" > + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="saveBiasPlot" type="boolean" label="Save a diagnostic image summarizing the GC bias found on the sample"/> + </when> + </conditional> + </inputs> + <outputs> + <data format="tabular" name="outFileName" /> + <data format="png" name="biasPlot" label="${tool.name} on ${on_string}: bias plot"> + <filter>(output['showOutputSettings'] == 'yes' and output['saveBiasPlot'] == True)</filter> + </data> + </outputs> + <help> + +**What it does** + +Computes the GC bias ussing Benjamini's method [citation]. The resulting GC +bias can later be used to plot the bias or to correct the bias. + +----- + +.. class:: infomark + +Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. + +This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. + + +.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ +.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de +.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de + + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/computeMatrix.xml Fri Aug 02 12:39:14 2013 -0400 @@ -0,0 +1,191 @@ +<tool id="computeMatrix" name="computeMatrix" version="1.0"> + <description>summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile</description> + <command> + computeMatrix + $mode.mode_select + --regionsFileName '$regionsFile' + --scoreFileName '$scoreFile' + --outFileName '$outFileName' + + #if $output.showOutputSettings == "yes" + #if $output.saveData: + --outFileNameData '$outFileNameData' + #end if + #if $output.saveMatrix: + --outFileNameMatrix '$outFileNameMatrix' + #end if + + #if $output.saveSortedRegions: + --outFileSortedRegions '$outFileSortedRegions' + #end if + #end if + + #if $mode.mode_select == "reference-point": + --referencePoint $mode.referencePoint + $mode.nanAfterEnd + --beforeRegionStartLength $mode.beforeRegionStartLength + --afterRegionStartLength $mode.afterRegionStartLength + #else + --regionBodyLength $mode.regionBodyLength + --startLabel $mode.startLabel + --endLabel $mode.endLabel + #if $mode.regionStartLength.regionStartLength_select == "yes": + --beforeRegionStartLength $mode.regionStartLength.beforeRegionStartLength + --afterRegionStartLength $mode.regionStartLength.afterRegionStartLength + #end if + #end if + + #if $advancedOpt.showAdvancedOpt == "yes": + --sortRegions '$advancedOpt.sortRegions' + --sortUsing '$advancedOpt.sortUsing' + --averageTypeBins '$advancedOpt.averageTypeBins' + $advancedOpt.missingDataAsZero + $advancedOpt.skipZeros + + #if $advancedOpt.minThreshold: + --minThreshold $advancedOpt.minThreshold + #end if + #if $advancedOpt.maxThreshold: + --maxThreshold $advancedOpt.maxThreshold + #end if + #if $advancedOpt.scale: + --scale $advancedOpt.scale + #end if + + #end if + --numberOfProcessors 4 + </command> + <inputs> + <param name="regionsFile" format="bed,gff" type="data" label="Regions to plot" help="File, in BED or GFF format, containing the regions to plot."/> + <param name="scoreFile" format="bigwig,bam" type="data" label="Score file" help="Either a bigWig file (containing a score, usually covering the whole genome) or a BAM file. For this last case, coverage counts will be used for the heatmap."/> + + <conditional name="mode" > + <param name="mode_select" type="select" label="computeMatrix has two main output options" help="In the scale-regions mode, all regions in the BED/GFF file are stretched or shrunk to the same length (bp) that is indicated by the user. Reference-point refers to a position within the BED/GFF regions (e.g start of region). In the reference-point mode only those genomic positions before (downstream) and/or after (upstream) the reference point will be plotted."> + <option value="scale-regions" selected="true">scale-regions</option> + <option value="reference-point">reference-point</option> + </param> + + <when value="scale-regions" > + <param name="regionBodyLength" type="integer" value="500" label="Distance in bp to which all regions are going to be fitted"/> + <param name="startLabel" type="text" value="TSS" size="10" label="Label for the region start" help ="Label shown in the plot for the start of the region. Default is TSS (transcription start site), but could be changed to anything, e.g. "peak start"." /> + <param name="endLabel" type="text" value="TES" size="10" label="Label for the region end" help="Label shown in the plot for the region end. Default is TES (transcription end site)."/> + <conditional name="regionStartLength"> + <param name="regionStartLength_select" type="select" label="Set distance up- and downstream of the given regions"> + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="beforeRegionStartLength" type="integer" value="1000" min="1" optional="true" label="Distance upstream of the start site of the regions defined in the region file" help="If the regions are genes, this would be the distance upstream of the transcription start site."/> + + <param name="afterRegionStartLength" type="integer" value="1000" min="1" optional="true" label="Distance downstream of the end site of the given regions" help="If the regions are genes, this would be the distance downstream of the transcription end site."/> + </when> + </conditional> + </when> + + <when value="reference-point"> + <param name="referencePoint" type="select" label="The reference point for the plotting"> + <option value="TSS" selected="true">region start (TSS)</option> + <option value="TES" selected="true">region end (TES)</option> + <option value="center" selected="true">center of the region</option> + </param> + <param name="nanAfterEnd" type="boolean" truevalue="--nanAfterEnd" falsevalue="" label="Discard any values after the region end" help="This is useful to visualize the region end when not using the scale-regions mode and when the reference-point is set to the TSS."/> + <param name="beforeRegionStartLength" type="integer" value="1000" min="1" label="Distance upstream of the start site of the regions defined in the region file" help="If the regions are genes, this would be the distance upstream of the transcription start site."/> + + <param name="afterRegionStartLength" type="integer" value="1000" min="1" label="Distance downstream of the end site of the given regions" help="If the regions are genes, this would be the distance downstream of the transcription end site."/> + </when> + </conditional> + + <conditional name="output" > + <param name="showOutputSettings" type="select" label="Show additional output options" > + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="saveData" type="boolean" label="Save the averages per matrix column into a text file" help="This corresponds to the underlying data used to plot a summary profile."/> + <param name="saveMatrix" type="boolean" label="Save the matrix of values underlying the heatmap" help="This matrix can easily be loaded into R or other programs."/> + <param name="saveSortedRegions" type="boolean" label="Save the regions after skiping zeros or min/max threshold values" help="The order of the regions in the file follows the sorting order selected. This is useful, for example, to generate other heatmaps keeping the sorting of the first heatmap."/> + </when> + </conditional> + + + <conditional name="advancedOpt" > + <param name="showAdvancedOpt" type="select" label="Show advanced options" > + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <when value="no" /> + <when value="yes"> + + <param name="binSize" type="integer" value="100" min="1" optional="true" label="Length, in base pairs, of the non-overlapping bin for averaging the score over the regions length" /> + + <param name="sortRegions" type="select" label="Sort regions" + help="Whether the output file should present the regions sorted."> + <option value="no" selected="true">no ordering</option> + <option value="descend">descending order</option> + <option value="ascend">ascending order</option> + </param> + + <param name="sortUsing" type="select" label="Method used for sorting." help="The value is computed for each row." > + <option value="mean" selected="true">mean</option> + <option value="median">median</option> + <option value="min">min</option> + <option value="max">max</option> + <option value="sum">sum</option> + <option value="region_length">region length</option> + </param> + + <param name="averageTypeBins" type="select" label="Define the type of statistic that should be used over the bin size range"> + <option value="mean" selected="true">mean</option> + <option value="median">median</option> + <option value="min">min</option> + <option value="max">max</option> + <option value="sum">sum</option> + <option value="std">std</option> + </param> + + <param name="missingDataAsZero" type="boolean" truevalue="--missingDataAsZero" falsevalue="" label="Indicate missing data as zero" help="Only for bigwig input! Set to "yes", if missing data should be indicated as zeros. Default is to ignore such cases which will be depicted as black areas in the heatmap. (see "Missing data color" options of the heatmapper for additional options)."/> + + <param name="skipZeros" type="boolean" truevalue="--skipZeros" falsevalue="" label="Skip zeros" help="Whether regions with only scores of zero should be included or not. Default is to include them."/> + + <param name="minThreshold" type="float" optional="true" label="Minimum threshold" help="Any region containing a value that is equal or less than this numeric value will be skipped. This is useful to skip, for example, genes where the read count is zero for any of the bins. This could be the result of unmappable areas and can bias the overall results."/> + <param name="maxThreshold" type="float" optional="true" label="Maximum threshold" help="Any region containing a value that is equal or higher that this numeric value will be skipped. The max threshold is useful to skip those few regions with very high read counts (e.g. major satellites) that may bias the average values."/> + <param name="scale" type="float" optional="true" label="Scale" help="If set, all values are multiplied by this number."/> + </when> + </conditional> + + </inputs> + <outputs> + <data format="bgzip" name="outFileName" label="${tool.name} on ${on_string}: matrix"> + </data> + <data format="tabular" name="outFileNameData" label="${tool.name} on ${on_string}: raw data"> + <filter>(output['showOutputSettings'] == 'yes' and output['saveData'] == True)</filter> + </data> + <data format="tabular" name="outFileNameMatrix" label="${tool.name} on ${on_string}: matrix of values"> + <filter>(output['showOutputSettings'] == 'yes' and output['saveMatrix'] == True)</filter> + </data> + <data format="bed" name="outFileSortedRegions" label="${tool.name} on ${on_string}: sorted/filtered regions"> + <filter>(output['showOutputSettings'] == 'yes' and output['saveSortedRegions'] == True)</filter> + </data> + </outputs> + <help> +**What it does** + +This tool summarizes and prepares an intermediary file containing scores associated with genomic regions that can be used afterwards to plot a heatmap or a profile. Typically, these genomic regions are genes, but any other regions defined in a BED or GFF format can be used. This tool can also be used to filter and sort regions according to their score. + +----- + +.. class:: infomark + +Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. + +This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. + + +.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ +.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de +.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/correctGCBias.xml Fri Aug 02 12:39:14 2013 -0400 @@ -0,0 +1,108 @@ +<tool id="correctGCBias" name="correctGCBias" version="1.0"> + <description> + </description> + <command> + correctGCBias + --bamfile '$bamInput' + --species '$species' + --GCbiasFrequenciesFile $GCbiasFrequenciesFile + + #if $source.ref_source=="history": + --genome $source.input1 + #else: + --genome "${source.input1_2bit.fields.path}" + #end if + + #if $advancedOpt.showAdvancedOpt == "yes": + #if str($advancedOpt.region.value) != '': + --region '$advancedOpt.region' + #end if + + --binSize '$advancedOpt.binSize' + #end if + + #set newoutFileName="corrected."+str($outFileFormat) + + --correctedFile $newoutFileName; mv $newoutFileName $outFileName + + </command> + + <inputs> + + <param name="GCbiasFrequenciesFile" type="data" format="tabular" label="Output of computeGCBias" /> + + <param name="bamInput" format="bam" type="data" label="Input BAM file" help="The BAM file must be sorted and indexed."/> + + <param name="species" type="text" value="" label="Species name abbreviation" /> + + <conditional name="source"> + <param name="ref_source" type="select" label="Reference genome"> + <option value="cached">locally cached</option> + <option value="history">in your history</option> + </param> + <when value="cached"> + <param name="input1_2bit" type="select" label="Using reference genome" help="If your genome of interest is not listed, contact your Galaxy team"> + <options from_data_table="lastz_seqs" /> + </param> + </when> + <when value="history"> + <param name="input1" type="data" format="bam" label="Select a reference dataset in 2bit format" /> + </when> + </conditional> + + <param name="outFileFormat" type="select" label="File format of the output"> + <option value="bam">bam</option> + <option value="bw">bigwig</option> + <option value="bg">bedgraph</option> + </param> + + <conditional name="advancedOpt"> + <param name="showAdvancedOpt" type="select" label="Show advanced options" > + <option value="no" selected="true">no</option> + <option value="yes">yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="region" type="text" value="" + label="Region of the genome to limit the operation to" + help="This is useful when testing parameters to reduce the computing time. The format is chr:start:end, for example "chr10" or "chr10:456700:891000"" /> + + <param name="binSize" type="integer" value="50" min="1" + label="Bin size in bp" + help="Size of the bins in bp for the ouput of the bigwig/bedgraph file."/> + </when> + </conditional> + </inputs> + + <outputs> + <data format="bam" name="outFileName"> + <change_format> + <when input="outFileFormat" value="bw" format="bigwig" /> + <when input="outFileFormat" value="bam" format="bam" /> + <when input="outFileFormat" value="bg" format="bedgraph" /> + </change_format> + </data> + </outputs> + <help> + +**What it does** + +Computes the GC bias ussing Benjamini's method [citation]. The resulting GC +bias can later be used to plot the bias or to correct the bias. + +----- + +.. class:: infomark + +Please acknowledge that this tool **is still in development** and we will be very happy to receive feedback from the users. If you run into any trouble please sent an email to `Fidel Ramirez`_. + +This tool is developed by the `Bioinformatics and Deep-Sequencing Unit`_ at the `Max Planck Institute for Immunobiology and Epigenetics`_. + + +.. _Bioinformatics and Deep-Sequencing Unit: http://www3.ie-freiburg.mpg.de/facilities/research-facilities/bioinformatics-and-deep-sequencing-unit/ +.. _Max Planck Institute for Immunobiology and Epigenetics: http://www3.ie-freiburg.mpg.de +.. _Fidel Ramirez: ramirez@ie-freiburg.mpg.de + + </help> + +</tool>
