Mercurial > repos > devteam > pearson_correlation
comparison correlation.pl @ 0:6c039ea3cc70 draft
Imported from capsule None
| author | devteam |
|---|---|
| date | Mon, 28 Jul 2014 11:30:16 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:6c039ea3cc70 |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 | |
| 3 ########################################################################### | |
| 4 # Purpose: To calculate the correlation of two sets of scores in one file. | |
| 5 # Usage: correlation.pl infile.bed output.txt column1 column2 | |
| 6 # (column start from 1) | |
| 7 # Written by: Yi Zhang (June, 2005) | |
| 8 ########################################################################### | |
| 9 if (!$ARGV[0] || !$ARGV[1] || !defined($ARGV[2]) || !defined($ARGV[3]) ) { | |
| 10 print STDERR "Usage: correlation.pl infile.bed output.txt column1 column2\n"; | |
| 11 print STDERR " (column start from 1)\n"; | |
| 12 exit; | |
| 13 } | |
| 14 my $file = $ARGV[0]; | |
| 15 my $out = $ARGV[1]; | |
| 16 | |
| 17 die "<font color=\"yellow\">The input columns contain numerical values: $ARGV[2], $ARGV[3]</font>.\n" if ($ARGV[2] =~ /[a-zA-Z]+/ || $ARGV[3] =~ /[a-zA-Z]+/); | |
| 18 | |
| 19 my $col1 = $ARGV[2] - 1; | |
| 20 my $col2 = $ARGV[3] - 1; | |
| 21 | |
| 22 my ($f, $o); | |
| 23 my (@a, @b); | |
| 24 | |
| 25 my $n_t = 0; | |
| 26 open($f, $file) or die "Could't open $file, $!\n"; | |
| 27 while(<$f>) { | |
| 28 chomp; | |
| 29 my @t = split(/\t/); | |
| 30 if ($n_t == 0) { | |
| 31 $n_t = scalar(@t) - 1; | |
| 32 die "<font color=\"yellow\">The input column number exceeds the size of the file: $col1, $col2, $n_t</font>\n" if ( $col1 > $n_t || $col2 > $n_t ); | |
| 33 } | |
| 34 die "<font color=\"yellow\">The columns you have selected contain non numeric characters:$t[$col1] and $t[$col2] \n</font>" if ($t[$col1] =~ /[a-zA-Z]+/ || $t[$col2] =~ /[a-zA-Z]+/); | |
| 35 push(@a, $t[$col1]); | |
| 36 push(@b, $t[$col2]); | |
| 37 } | |
| 38 close($f); | |
| 39 | |
| 40 my $result = correlation(\@a, \@b); | |
| 41 | |
| 42 open($o, ">$out") or die "Couldn't open $out, $!\n"; | |
| 43 $col1 = $col1 + 1; | |
| 44 $col2 = $col2 + 1; | |
| 45 print $o "The correlation of column $col1 and $col2 is $result\n"; | |
| 46 close($o); | |
| 47 print "The correlation of column $col1 and $col2 is $result\n"; | |
| 48 | |
| 49 sub correlation { | |
| 50 my ($array1ref, $array2ref) = @_; | |
| 51 my ($sum1, $sum2); | |
| 52 my ($sum1_squared, $sum2_squared); | |
| 53 foreach (@$array1ref) { $sum1 += $_; $sum1_squared += $_**2; } | |
| 54 foreach (@$array2ref) { $sum2 += $_; $sum2_squared += $_**2; } | |
| 55 my $numerator = (@$array1ref**2) * covariance($array1ref, $array2ref); | |
| 56 my $denominator = sqrt(((@$array1ref * $sum1_squared) - ($sum1**2)) * | |
| 57 ((@$array1ref * $sum2_squared) - ($sum2**2))); | |
| 58 my $r; | |
| 59 if ($denominator == 0) { | |
| 60 print STDERR "The denominator is 0.\n"; | |
| 61 exit 0; | |
| 62 } else { | |
| 63 $r = $numerator / $denominator; | |
| 64 } | |
| 65 return $r; | |
| 66 } | |
| 67 | |
| 68 sub covariance { | |
| 69 my ($array1ref, $array2ref) = @_; | |
| 70 my ($i, $result); | |
| 71 for ($i = 0; $i < @$array1ref; $i++) { | |
| 72 $result += $array1ref->[$i] * $array2ref->[$i]; | |
| 73 } | |
| 74 $result /= @$array1ref; | |
| 75 $result -= mean($array1ref) * mean($array2ref); | |
| 76 } | |
| 77 | |
| 78 sub mean { | |
| 79 my ($arrayref) = @_; | |
| 80 my $result; | |
| 81 foreach (@$arrayref) { $result += $_; } | |
| 82 return $result/@$arrayref; | |
| 83 } | |
| 84 |
