# HG changeset patch # User greg # Date 1457057792 18000 # Node ID 48d424adfaef09e804f7d80c6fa5dd3b491297e6 Uploaded diff -r 000000000000 -r 48d424adfaef fimo_gff_to_gff.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fimo_gff_to_gff.pl Thu Mar 03 21:16:32 2016 -0500 @@ -0,0 +1,74 @@ +#! /usr/bin/perl + +die "FIMO_GFF_File\tOutput_Path\n" unless $#ARGV == 1; +my($input, $output) = @ARGV; +open(IN, "<$input") or die "Can't open $input for reading!\n"; + +##gff-version 3 +#chr10:265210-265270(-) fimo nucleotide_motif 25 36 40.2 + . Name=1;ID=1-1-chr10:265210-265270(-);pvalue=9.48e-05;qvalue=0.00885;sequence=ACTTACCCTCAT; +#chr10:295039-295099(+) fimo nucleotide_motif 25 36 55.3 + . Name=1;ID=1-1-chr10:295039-295099(+);pvalue=2.97e-06;qvalue=0.00107;sequence=TGTTACCCGTTC; +#chr10:576747-576807(-) fimo nucleotide_motif 25 36 56.2 + . Name=1;ID=1-1-chr10:576747-576807(-);pvalue=2.37e-06;qvalue=0.00107;sequence=CGTTACCCGACC; + +#chr1 genetrack . 123950 123970 22 + . stddev=0.0 +#chr1 genetrack . 565745 565765 12 + . stddev=0.0 +#chr1 genetrack . 565793 565813 44 + . stddev=0.298065387468 + +@COORD = (); +@ID_NUM = (); +$line = ""; +while($line = ) { + chomp($line); + next if($line =~ /gff-version/); + @array = split(/\t/, $line); + @CHR = split(/\:/, $array[0]); + @gff_COORD = split(/\(/, $CHR[1]); + @START_array = split(/\-/, $gff_COORD[0]); + $fimo_DIR = "+"; + if($gff_COORD[1] =~ "-") { $fimo_DIR = "-"; } + + $DIR = $array[6]; + $SCORE = $array[5]; + + @NAME = split(/\;/, $array[8]); + $NEW = 0; + for($x = 0; $x <= $#ID_NUM; $x++) { + if($ID_NUM[$x] eq $NAME[0]) { + $NEW = 1; + $x = $#ID_NUM + 1; + } + } + if($NEW == 0) { push(@ID_NUM, $NAME[0]); } + + $START = $START_array[0] + $array[3]; + $STOP = $START_array[0] + $array[4]; + + if($fimo_DIR eq "-") { + if($DIR eq "+") { $DIR = "-"; } + else { $DIR = "+"; } + } + + $newline = "$CHR[0]\tfimo\tmotif\t$START\t$STOP\t$SCORE\t$DIR\t.\t$CHR[0]\_$START\_$STOP\_$DIR"; + $EXISTS = 0; + for($x = 0; $x <= $#COORD; $x++) { + if($newline eq $COORD[$x]{'line'}) { + $EXISTS = 1; + } + } + if($EXISTS == 0) { + push(@COORD, {chr => $CHR[0], start => $START, stop => $STOP, dir => $DIR, score =>$SCORE, id => $NAME[0], line => $newline}); + } +} +close IN; +@SORT = sort { $$b{'score'} <=> $$a{'score'} } @COORD; + +for($x = 0; $x <= $#ID_NUM; $x++) { + @FILENAME = split(/\=/, $ID_NUM[$x]); + $FILE = "MOTIF$FILENAME[1]"; + open(OUT, ">$output/$FILE.gff") or die "Can't open $output/$FILE.gff for writing!\n"; + for($y = 0; $y <= $#SORT; $y++) { + if($SORT[$y]{'id'} eq $ID_NUM[$x]) { + print OUT $SORT[$y]{'line'},"\n"; + } + } + close OUT; +} diff -r 000000000000 -r 48d424adfaef fimo_gff_to_gff.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fimo_gff_to_gff.xml Thu Mar 03 21:16:32 2016 -0500 @@ -0,0 +1,52 @@ + + + + perl + + + + + + + + + + + + + + + + + + + + + + +.. class:: warningmark + +This tool requires FASTA headers that use the default bedtools getfasta header format of +**chrom:start-stop(strand)** or **chrom:start-stop** in which case strand is set as '+' +per the bedtools standard. + +**What it does** + +Converts FIMO tabular (almost GFF) files to true genomic coordinates in valid GFF format. A +collection of datasets is produced consisting of one dataset per motif discovered in the input. + + + + + @unpublished{None, + author = {Lai, William}, + title = {None}, + year = {None}, + eprint = {None}, + url = {http://www.huck.psu.edu/content/research/independent-centers-excellence/center-for-eukaryotic-gene-regulation} + } + + diff -r 000000000000 -r 48d424adfaef test-data/input.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.tabular Thu Mar 03 21:16:32 2016 -0500 @@ -0,0 +1,11 @@ +##gff-version 3 +sacCer3_cegr;chr14;359337;359397;. fimo nucleotide_motif 10 50 217 + . Name=1;ID=1-1-sacCer3_cegr_chr14_359337_359397_.;pvalue=2.06e-22;sequence=TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT; +sacCer3_cegr;chr14;359337;359397;. fimo nucleotide_motif 11 51 217 + . Name=1;ID=1-2-sacCer3_cegr_chr14_359337_359397_.;pvalue=2.06e-22;sequence=TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT; +sacCer3_cegr;chr14;359337;359397;. fimo nucleotide_motif 8 48 209 + . Name=1;ID=1-3-sacCer3_cegr_chr14_359337_359397_.;pvalue=1.36e-21;sequence=TCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT; +sacCer3_cegr;chr14;359337;359397;. fimo nucleotide_motif 6 46 197 + . Name=1;ID=1-4-sacCer3_cegr_chr14_359337_359397_.;pvalue=1.92e-20;sequence=TTTCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT; +sacCer3_cegr;chr14;359337;359397;. fimo nucleotide_motif 5 45 194 + . Name=1;ID=1-5-sacCer3_cegr_chr14_359337_359397_.;pvalue=3.84e-20;sequence=TTTTCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT; +sacCer3_cegr;chr14;359337;359397;. fimo nucleotide_motif 13 53 190 + . Name=1;ID=1-6-sacCer3_cegr_chr14_359337_359397_.;pvalue=9.63e-20;sequence=TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGA; +sacCer3_cegr;chr14;359337;359397;. fimo nucleotide_motif 7 47 190 + . Name=1;ID=1-7-sacCer3_cegr_chr14_359337_359397_.;pvalue=1.05e-19;sequence=TTCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT; +sacCer3_cegr;chr14;359337;359397;. fimo nucleotide_motif 12 52 178 + . Name=1;ID=1-8-sacCer3_cegr_chr14_359337_359397_.;pvalue=1.4e-18;sequence=TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTG; +sacCer3_cegr;chr14;359337;359397;. fimo nucleotide_motif 17 57 176 + . Name=1;ID=1-9-sacCer3_cegr_chr14_359337_359397_.;pvalue=2.62e-18;sequence=TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAATCT; +sacCer3_cegr;chr14;359337;359397;. fimo nucleotide_motif 9 49 170 + . Name=1;ID=1-10-sacCer3_cegr_chr14_359337_359397_.;pvalue=9.64e-18;sequence=CTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT; diff -r 000000000000 -r 48d424adfaef test-data/motif1.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/motif1.gff Thu Mar 03 21:16:32 2016 -0500 @@ -0,0 +1,10 @@ +sacCer3_cegr;chr14;359337;359397;. fimo motif 10 50 217 + . sacCer3_cegr;chr14;359337;359397;._10_50_+ +sacCer3_cegr;chr14;359337;359397;. fimo motif 11 51 217 + . sacCer3_cegr;chr14;359337;359397;._11_51_+ +sacCer3_cegr;chr14;359337;359397;. fimo motif 8 48 209 + . sacCer3_cegr;chr14;359337;359397;._8_48_+ +sacCer3_cegr;chr14;359337;359397;. fimo motif 6 46 197 + . sacCer3_cegr;chr14;359337;359397;._6_46_+ +sacCer3_cegr;chr14;359337;359397;. fimo motif 5 45 194 + . sacCer3_cegr;chr14;359337;359397;._5_45_+ +sacCer3_cegr;chr14;359337;359397;. fimo motif 13 53 190 + . sacCer3_cegr;chr14;359337;359397;._13_53_+ +sacCer3_cegr;chr14;359337;359397;. fimo motif 7 47 190 + . sacCer3_cegr;chr14;359337;359397;._7_47_+ +sacCer3_cegr;chr14;359337;359397;. fimo motif 12 52 178 + . sacCer3_cegr;chr14;359337;359397;._12_52_+ +sacCer3_cegr;chr14;359337;359397;. fimo motif 17 57 176 + . sacCer3_cegr;chr14;359337;359397;._17_57_+ +sacCer3_cegr;chr14;359337;359397;. fimo motif 9 49 170 + . sacCer3_cegr;chr14;359337;359397;._9_49_+ diff -r 000000000000 -r 48d424adfaef tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Mar 03 21:16:32 2016 -0500 @@ -0,0 +1,6 @@ + + + + + +