0
|
1 #! /usr/bin/perl
|
|
2
|
|
3 die "FIMO_GFF_File\tOutput_Path\n" unless $#ARGV == 1;
|
|
4 my($input, $output) = @ARGV;
|
|
5 open(IN, "<$input") or die "Can't open $input for reading!\n";
|
|
6
|
|
7 ##gff-version 3
|
|
8 #chr10:265210-265270(-) fimo nucleotide_motif 25 36 40.2 + . Name=1;ID=1-1-chr10:265210-265270(-);pvalue=9.48e-05;qvalue=0.00885;sequence=ACTTACCCTCAT;
|
|
9 #chr10:295039-295099(+) fimo nucleotide_motif 25 36 55.3 + . Name=1;ID=1-1-chr10:295039-295099(+);pvalue=2.97e-06;qvalue=0.00107;sequence=TGTTACCCGTTC;
|
|
10 #chr10:576747-576807(-) fimo nucleotide_motif 25 36 56.2 + . Name=1;ID=1-1-chr10:576747-576807(-);pvalue=2.37e-06;qvalue=0.00107;sequence=CGTTACCCGACC;
|
|
11
|
1
|
12 #chr1 genetrack . 123950 123970 22 + . stddev=0.0
|
|
13 #chr1 genetrack . 565745 565765 12 + . stddev=0.0
|
|
14 #chr1 genetrack . 565793 565813 44 + . stddev=0.298065387468
|
0
|
15
|
|
16 @COORD = ();
|
|
17 @ID_NUM = ();
|
|
18 $line = "";
|
|
19 while($line = <IN>) {
|
1
|
20 chomp($line);
|
0
|
21 next if($line =~ /gff-version/);
|
|
22 @array = split(/\t/, $line);
|
|
23 @CHR = split(/\:/, $array[0]);
|
1
|
24 @gff_COORD = split(/\(/, $CHR[1]);
|
|
25 @START_array = split(/\-/, $gff_COORD[0]);
|
|
26 $fimo_DIR = "+";
|
|
27 if($gff_COORD[1] =~ "-") { $fimo_DIR = "-"; }
|
0
|
28
|
|
29 $DIR = $array[6];
|
|
30 $SCORE = $array[5];
|
|
31
|
1
|
32 @NAME = split(/\;/, $array[8]);
|
0
|
33 $NEW = 0;
|
|
34 for($x = 0; $x <= $#ID_NUM; $x++) {
|
|
35 if($ID_NUM[$x] eq $NAME[0]) {
|
|
36 $NEW = 1;
|
|
37 $x = $#ID_NUM + 1;
|
|
38 }
|
|
39 }
|
|
40 if($NEW == 0) { push(@ID_NUM, $NAME[0]); }
|
|
41
|
1
|
42 $START = $START_array[0] + $array[3] - 1;
|
|
43 $STOP = $START_array[0] + $array[4] - 1;
|
0
|
44
|
1
|
45 if($fimo_DIR eq "-") {
|
|
46 if($DIR eq "+") { $DIR = "-"; }
|
|
47 else { $DIR = "+"; }
|
|
48 }
|
0
|
49
|
|
50 $newline = "$CHR[0]\tfimo\tmotif\t$START\t$STOP\t$SCORE\t$DIR\t.\t$CHR[0]\_$START\_$STOP\_$DIR";
|
|
51 $EXISTS = 0;
|
|
52 for($x = 0; $x <= $#COORD; $x++) {
|
|
53 if($newline eq $COORD[$x]{'line'}) {
|
|
54 $EXISTS = 1;
|
|
55 }
|
|
56 }
|
|
57 if($EXISTS == 0) {
|
|
58 push(@COORD, {chr => $CHR[0], start => $START, stop => $STOP, dir => $DIR, score =>$SCORE, id => $NAME[0], line => $newline});
|
|
59 }
|
|
60 }
|
|
61 close IN;
|
|
62 @SORT = sort { $$b{'score'} <=> $$a{'score'} } @COORD;
|
|
63
|
|
64 for($x = 0; $x <= $#ID_NUM; $x++) {
|
1
|
65 @FILENAME = split(/\=/, $ID_NUM[$x]);
|
|
66 $FILE = "MOTIF$FILENAME[1]";
|
|
67 open(OUT, ">$output/$FILE.gff") or die "Can't open $output/$FILE.gff for writing!\n";
|
|
68 for($y = 0; $y <= $#SORT; $y++) {
|
|
69 if($SORT[$y]{'id'} eq $ID_NUM[$x]) {
|
|
70 print OUT $SORT[$y]{'line'},"\n";
|
|
71 }
|
|
72 }
|
|
73 close OUT;
|
0
|
74 }
|