comparison fimo_gff_to_gff.pl @ 1:063d97ffda39 draft

Uploaded
author greg
date Fri, 04 Mar 2016 15:31:45 -0500
parents 48d424adfaef
children
comparison
equal deleted inserted replaced
0:48d424adfaef 1:063d97ffda39
7 ##gff-version 3 7 ##gff-version 3
8 #chr10:265210-265270(-) fimo nucleotide_motif 25 36 40.2 + . Name=1;ID=1-1-chr10:265210-265270(-);pvalue=9.48e-05;qvalue=0.00885;sequence=ACTTACCCTCAT; 8 #chr10:265210-265270(-) fimo nucleotide_motif 25 36 40.2 + . Name=1;ID=1-1-chr10:265210-265270(-);pvalue=9.48e-05;qvalue=0.00885;sequence=ACTTACCCTCAT;
9 #chr10:295039-295099(+) fimo nucleotide_motif 25 36 55.3 + . Name=1;ID=1-1-chr10:295039-295099(+);pvalue=2.97e-06;qvalue=0.00107;sequence=TGTTACCCGTTC; 9 #chr10:295039-295099(+) fimo nucleotide_motif 25 36 55.3 + . Name=1;ID=1-1-chr10:295039-295099(+);pvalue=2.97e-06;qvalue=0.00107;sequence=TGTTACCCGTTC;
10 #chr10:576747-576807(-) fimo nucleotide_motif 25 36 56.2 + . Name=1;ID=1-1-chr10:576747-576807(-);pvalue=2.37e-06;qvalue=0.00107;sequence=CGTTACCCGACC; 10 #chr10:576747-576807(-) fimo nucleotide_motif 25 36 56.2 + . Name=1;ID=1-1-chr10:576747-576807(-);pvalue=2.37e-06;qvalue=0.00107;sequence=CGTTACCCGACC;
11 11
12 #chr1 genetrack . 123950 123970 22 + . stddev=0.0 12 #chr1 genetrack . 123950 123970 22 + . stddev=0.0
13 #chr1 genetrack . 565745 565765 12 + . stddev=0.0 13 #chr1 genetrack . 565745 565765 12 + . stddev=0.0
14 #chr1 genetrack . 565793 565813 44 + . stddev=0.298065387468 14 #chr1 genetrack . 565793 565813 44 + . stddev=0.298065387468
15 15
16 @COORD = (); 16 @COORD = ();
17 @ID_NUM = (); 17 @ID_NUM = ();
18 $line = ""; 18 $line = "";
19 while($line = <IN>) { 19 while($line = <IN>) {
20 chomp($line); 20 chomp($line);
21 next if($line =~ /gff-version/); 21 next if($line =~ /gff-version/);
22 @array = split(/\t/, $line); 22 @array = split(/\t/, $line);
23 @CHR = split(/\:/, $array[0]); 23 @CHR = split(/\:/, $array[0]);
24 @gff_COORD = split(/\(/, $CHR[1]); 24 @gff_COORD = split(/\(/, $CHR[1]);
25 @START_array = split(/\-/, $gff_COORD[0]); 25 @START_array = split(/\-/, $gff_COORD[0]);
26 $fimo_DIR = "+"; 26 $fimo_DIR = "+";
27 if($gff_COORD[1] =~ "-") { $fimo_DIR = "-"; } 27 if($gff_COORD[1] =~ "-") { $fimo_DIR = "-"; }
28 28
29 $DIR = $array[6]; 29 $DIR = $array[6];
30 $SCORE = $array[5]; 30 $SCORE = $array[5];
31 31
32 @NAME = split(/\;/, $array[8]); 32 @NAME = split(/\;/, $array[8]);
33 $NEW = 0; 33 $NEW = 0;
34 for($x = 0; $x <= $#ID_NUM; $x++) { 34 for($x = 0; $x <= $#ID_NUM; $x++) {
35 if($ID_NUM[$x] eq $NAME[0]) { 35 if($ID_NUM[$x] eq $NAME[0]) {
36 $NEW = 1; 36 $NEW = 1;
37 $x = $#ID_NUM + 1; 37 $x = $#ID_NUM + 1;
38 } 38 }
39 } 39 }
40 if($NEW == 0) { push(@ID_NUM, $NAME[0]); } 40 if($NEW == 0) { push(@ID_NUM, $NAME[0]); }
41 41
42 $START = $START_array[0] + $array[3]; 42 $START = $START_array[0] + $array[3] - 1;
43 $STOP = $START_array[0] + $array[4]; 43 $STOP = $START_array[0] + $array[4] - 1;
44 44
45 if($fimo_DIR eq "-") { 45 if($fimo_DIR eq "-") {
46 if($DIR eq "+") { $DIR = "-"; } 46 if($DIR eq "+") { $DIR = "-"; }
47 else { $DIR = "+"; } 47 else { $DIR = "+"; }
48 } 48 }
49 49
50 $newline = "$CHR[0]\tfimo\tmotif\t$START\t$STOP\t$SCORE\t$DIR\t.\t$CHR[0]\_$START\_$STOP\_$DIR"; 50 $newline = "$CHR[0]\tfimo\tmotif\t$START\t$STOP\t$SCORE\t$DIR\t.\t$CHR[0]\_$START\_$STOP\_$DIR";
51 $EXISTS = 0; 51 $EXISTS = 0;
52 for($x = 0; $x <= $#COORD; $x++) { 52 for($x = 0; $x <= $#COORD; $x++) {
53 if($newline eq $COORD[$x]{'line'}) { 53 if($newline eq $COORD[$x]{'line'}) {
60 } 60 }
61 close IN; 61 close IN;
62 @SORT = sort { $$b{'score'} <=> $$a{'score'} } @COORD; 62 @SORT = sort { $$b{'score'} <=> $$a{'score'} } @COORD;
63 63
64 for($x = 0; $x <= $#ID_NUM; $x++) { 64 for($x = 0; $x <= $#ID_NUM; $x++) {
65 @FILENAME = split(/\=/, $ID_NUM[$x]); 65 @FILENAME = split(/\=/, $ID_NUM[$x]);
66 $FILE = "MOTIF$FILENAME[1]"; 66 $FILE = "MOTIF$FILENAME[1]";
67 open(OUT, ">$output/$FILE.gff") or die "Can't open $output/$FILE.gff for writing!\n"; 67 open(OUT, ">$output/$FILE.gff") or die "Can't open $output/$FILE.gff for writing!\n";
68 for($y = 0; $y <= $#SORT; $y++) { 68 for($y = 0; $y <= $#SORT; $y++) {
69 if($SORT[$y]{'id'} eq $ID_NUM[$x]) { 69 if($SORT[$y]{'id'} eq $ID_NUM[$x]) {
70 print OUT $SORT[$y]{'line'},"\n"; 70 print OUT $SORT[$y]{'line'},"\n";
71 } 71 }
72 } 72 }
73 close OUT; 73 close OUT;
74 } 74 }