3
|
1 #! /usr/bin/perl -w
|
|
2
|
|
3 use strict;
|
|
4
|
|
5 #### Read thru a GFF file of motifs return the number of unique ids
|
|
6 #### Ian Donaldson Sept 2008
|
|
7
|
|
8 #### Usage
|
|
9 unless(@ARGV == 2) {
|
|
10 die("USAGE: $0 | GFF file | Output file\n\n");
|
|
11 }
|
|
12
|
|
13 #### Ready output file
|
|
14 open(GFF, "<$ARGV[0]") or die("Could not open GFF file!!\n\n");
|
|
15 open(OUTPUT, ">$ARGV[1]") or die("Could not open output file!!\n\n");
|
|
16
|
|
17 #### Hash to hold ids
|
|
18 my %id_hash = ();
|
|
19
|
|
20 #### Work thru GFF file
|
|
21 while(defined(my $gff_line = <GFF>)) {
|
|
22 if($gff_line =~ /(^#|^\s)/) { next }
|
|
23
|
|
24 my @gff_line_bits = split(/\t/, $gff_line);
|
|
25
|
|
26 my $id = $gff_line_bits[0];
|
|
27
|
|
28 $id_hash{$id}=1;
|
|
29 }
|
|
30
|
|
31 my @all_keys = sort(keys(%id_hash));
|
|
32
|
|
33 my $elements = scalar(@all_keys);
|
|
34
|
|
35 #print OUTPUT "There are $elements unique sequences in the file\n";
|
|
36 print OUTPUT "$elements non-redundant sequences\n";
|
|
37
|
|
38 #### Close files
|
|
39 close(GFF);
|
|
40 close(OUTPUT);
|
|
41
|
|
42 exit;
|