changeset 0:48d424adfaef draft

Uploaded
author greg
date Thu, 03 Mar 2016 21:16:32 -0500
parents
children 063d97ffda39
files fimo_gff_to_gff.pl fimo_gff_to_gff.xml test-data/input.tabular test-data/motif1.gff tool_dependencies.xml
diffstat 5 files changed, 153 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fimo_gff_to_gff.pl	Thu Mar 03 21:16:32 2016 -0500
@@ -0,0 +1,74 @@
+#! /usr/bin/perl
+
+die "FIMO_GFF_File\tOutput_Path\n" unless $#ARGV == 1;
+my($input, $output) = @ARGV;
+open(IN, "<$input") or die "Can't open $input for reading!\n";
+
+##gff-version 3
+#chr10:265210-265270(-)  fimo    nucleotide_motif        25      36      40.2    +       .       Name=1;ID=1-1-chr10:265210-265270(-);pvalue=9.48e-05;qvalue=0.00885;sequence=ACTTACCCTCAT;
+#chr10:295039-295099(+)  fimo    nucleotide_motif        25      36      55.3    +       .       Name=1;ID=1-1-chr10:295039-295099(+);pvalue=2.97e-06;qvalue=0.00107;sequence=TGTTACCCGTTC;
+#chr10:576747-576807(-)  fimo    nucleotide_motif        25      36      56.2    +       .       Name=1;ID=1-1-chr10:576747-576807(-);pvalue=2.37e-06;qvalue=0.00107;sequence=CGTTACCCGACC;
+
+#chr1        genetrack        .        123950        123970        22        +        .        stddev=0.0
+#chr1        genetrack        .        565745        565765        12        +        .        stddev=0.0
+#chr1        genetrack        .        565793        565813        44        +        .        stddev=0.298065387468
+
+@COORD = ();
+@ID_NUM = ();
+$line = "";
+while($line = <IN>) {
+        chomp($line);
+        next if($line =~ /gff-version/);
+        @array = split(/\t/, $line);
+        @CHR = split(/\:/, $array[0]);
+        @gff_COORD = split(/\(/, $CHR[1]);
+        @START_array = split(/\-/, $gff_COORD[0]);
+        $fimo_DIR = "+";
+        if($gff_COORD[1] =~ "-") { $fimo_DIR = "-"; }
+
+        $DIR = $array[6];
+        $SCORE = $array[5];
+
+        @NAME = split(/\;/, $array[8]);
+        $NEW = 0;
+        for($x = 0; $x <= $#ID_NUM; $x++) {
+                if($ID_NUM[$x] eq $NAME[0]) {
+                        $NEW = 1;
+                        $x = $#ID_NUM + 1;
+                }
+        }
+        if($NEW == 0) { push(@ID_NUM, $NAME[0]); }
+
+        $START = $START_array[0] + $array[3];
+        $STOP = $START_array[0] + $array[4];
+
+        if($fimo_DIR eq "-") {
+                if($DIR eq "+") { $DIR = "-"; }
+                else { $DIR = "+"; }
+        }
+
+        $newline = "$CHR[0]\tfimo\tmotif\t$START\t$STOP\t$SCORE\t$DIR\t.\t$CHR[0]\_$START\_$STOP\_$DIR";
+        $EXISTS = 0;
+        for($x = 0; $x <= $#COORD; $x++) {
+                if($newline eq $COORD[$x]{'line'}) {
+                        $EXISTS = 1;
+                }
+        }
+        if($EXISTS == 0) {
+                push(@COORD, {chr => $CHR[0], start => $START, stop => $STOP, dir => $DIR, score =>$SCORE, id => $NAME[0], line => $newline});
+        }
+}
+close IN;
+@SORT = sort { $$b{'score'} <=> $$a{'score'} } @COORD;
+
+for($x = 0; $x <= $#ID_NUM; $x++) {
+        @FILENAME = split(/\=/, $ID_NUM[$x]);
+        $FILE = "MOTIF$FILENAME[1]";
+        open(OUT, ">$output/$FILE.gff") or die "Can't open $output/$FILE.gff for writing!\n";
+        for($y = 0; $y <= $#SORT; $y++) {
+                if($SORT[$y]{'id'} eq $ID_NUM[$x]) {
+                        print OUT $SORT[$y]{'line'},"\n";
+                }
+        }
+        close OUT;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fimo_gff_to_gff.xml	Thu Mar 03 21:16:32 2016 -0500
@@ -0,0 +1,52 @@
+<tool id="fimo_gff_to_gff" name="Fimo Gff to Gff" version="1.0.0">
+    <description></description>
+    <requirements>
+        <requirement type="package" version="5.18.1">perl</requirement>
+    </requirements>
+    <command>
+        <![CDATA[
+            mkdir -p output &&
+            perl $__tool_directory__/fimo_gff_to_gff.pl "$input" output
+        ]]>
+    </command>
+    <inputs>
+        <param name="input" type="data" format="tabular" label="FIMO (almost) Gff input" />
+    </inputs>
+    <outputs>
+        <collection name="motifs" type="list" label="Motifs: ${tool.name} on ${on_string}">
+            <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="output" ext="gff" visible="false" />
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="input.tabular" ftype="tabular" />
+            <output_collection name="motifs" type="list">
+                <element name="MOTIF1" file="motif1.gff" ftype="gff"/>
+            </output_collection>
+        </test>
+    </tests>
+    <help>
+
+.. class:: warningmark
+
+This tool requires FASTA headers that use the default bedtools getfasta header format of
+**chrom:start-stop(strand)** or **chrom:start-stop** in which case strand is set as '+'
+per the bedtools standard.
+
+**What it does**
+
+Converts FIMO tabular (almost GFF) files to true genomic coordinates in valid GFF format.  A
+collection of datasets is produced consisting of one dataset per motif discovered in the input.
+
+    </help>
+    <citations>
+        <citation type="bibtex">
+            @unpublished{None,
+            author = {Lai, William},
+            title = {None},
+            year = {None},
+            eprint = {None},
+            url = {http://www.huck.psu.edu/content/research/independent-centers-excellence/center-for-eukaryotic-gene-regulation}
+        }</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.tabular	Thu Mar 03 21:16:32 2016 -0500
@@ -0,0 +1,11 @@
+##gff-version 3
+sacCer3_cegr;chr14;359337;359397;.	fimo	nucleotide_motif	10	50	217	+	.	Name=1;ID=1-1-sacCer3_cegr_chr14_359337_359397_.;pvalue=2.06e-22;sequence=TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT;
+sacCer3_cegr;chr14;359337;359397;.	fimo	nucleotide_motif	11	51	217	+	.	Name=1;ID=1-2-sacCer3_cegr_chr14_359337_359397_.;pvalue=2.06e-22;sequence=TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT;
+sacCer3_cegr;chr14;359337;359397;.	fimo	nucleotide_motif	8	48	209	+	.	Name=1;ID=1-3-sacCer3_cegr_chr14_359337_359397_.;pvalue=1.36e-21;sequence=TCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT;
+sacCer3_cegr;chr14;359337;359397;.	fimo	nucleotide_motif	6	46	197	+	.	Name=1;ID=1-4-sacCer3_cegr_chr14_359337_359397_.;pvalue=1.92e-20;sequence=TTTCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT;
+sacCer3_cegr;chr14;359337;359397;.	fimo	nucleotide_motif	5	45	194	+	.	Name=1;ID=1-5-sacCer3_cegr_chr14_359337_359397_.;pvalue=3.84e-20;sequence=TTTTCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT;
+sacCer3_cegr;chr14;359337;359397;.	fimo	nucleotide_motif	13	53	190	+	.	Name=1;ID=1-6-sacCer3_cegr_chr14_359337_359397_.;pvalue=9.63e-20;sequence=TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGA;
+sacCer3_cegr;chr14;359337;359397;.	fimo	nucleotide_motif	7	47	190	+	.	Name=1;ID=1-7-sacCer3_cegr_chr14_359337_359397_.;pvalue=1.05e-19;sequence=TTCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT;
+sacCer3_cegr;chr14;359337;359397;.	fimo	nucleotide_motif	12	52	178	+	.	Name=1;ID=1-8-sacCer3_cegr_chr14_359337_359397_.;pvalue=1.4e-18;sequence=TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTG;
+sacCer3_cegr;chr14;359337;359397;.	fimo	nucleotide_motif	17	57	176	+	.	Name=1;ID=1-9-sacCer3_cegr_chr14_359337_359397_.;pvalue=2.62e-18;sequence=TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAATCT;
+sacCer3_cegr;chr14;359337;359397;.	fimo	nucleotide_motif	9	49	170	+	.	Name=1;ID=1-10-sacCer3_cegr_chr14_359337_359397_.;pvalue=9.64e-18;sequence=CTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/motif1.gff	Thu Mar 03 21:16:32 2016 -0500
@@ -0,0 +1,10 @@
+sacCer3_cegr;chr14;359337;359397;.	fimo	motif	10	50	217	+	.	sacCer3_cegr;chr14;359337;359397;._10_50_+
+sacCer3_cegr;chr14;359337;359397;.	fimo	motif	11	51	217	+	.	sacCer3_cegr;chr14;359337;359397;._11_51_+
+sacCer3_cegr;chr14;359337;359397;.	fimo	motif	8	48	209	+	.	sacCer3_cegr;chr14;359337;359397;._8_48_+
+sacCer3_cegr;chr14;359337;359397;.	fimo	motif	6	46	197	+	.	sacCer3_cegr;chr14;359337;359397;._6_46_+
+sacCer3_cegr;chr14;359337;359397;.	fimo	motif	5	45	194	+	.	sacCer3_cegr;chr14;359337;359397;._5_45_+
+sacCer3_cegr;chr14;359337;359397;.	fimo	motif	13	53	190	+	.	sacCer3_cegr;chr14;359337;359397;._13_53_+
+sacCer3_cegr;chr14;359337;359397;.	fimo	motif	7	47	190	+	.	sacCer3_cegr;chr14;359337;359397;._7_47_+
+sacCer3_cegr;chr14;359337;359397;.	fimo	motif	12	52	178	+	.	sacCer3_cegr;chr14;359337;359397;._12_52_+
+sacCer3_cegr;chr14;359337;359397;.	fimo	motif	17	57	176	+	.	sacCer3_cegr;chr14;359337;359397;._17_57_+
+sacCer3_cegr;chr14;359337;359397;.	fimo	motif	9	49	170	+	.	sacCer3_cegr;chr14;359337;359397;._9_49_+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Mar 03 21:16:32 2016 -0500
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="perl" version="5.18.1">
+        <repository changeset_revision="95dad0955d7e" name="package_perl_5_18" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>