Mercurial > repos > devteam > categorize_elements_satisfying_criteria
comparison categorize_elements_satisfying_criteria.pl @ 0:314830c0db00 draft default tip
Uploaded
| author | devteam |
|---|---|
| date | Tue, 20 Aug 2013 09:22:17 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:314830c0db00 |
|---|---|
| 1 #!/usr/bin/perl -w | |
| 2 | |
| 3 # The program takes as input a set of categories, such that each category contains many elements. | |
| 4 # It also takes a table relating elements with criteria, such that each element is assigned a number | |
| 5 # representing the number of times the element satisfies a certain criterion. | |
| 6 # The first input is a TABULAR format file, such that the left column represents the name of categories and, | |
| 7 # all other columns represent the names of elements. | |
| 8 # The second input is a TABULAR format file relating elements with criteria, such that the first line | |
| 9 # represents the names of criteria and the left column represents the names of elements. | |
| 10 # The output is a TABULAR format file relating catergories with criteria, such that each categoy is | |
| 11 # assigned a number representing the total number of times its elements satisfies a certain criterion. | |
| 12 # Each category is assigned as many numbers as criteria. | |
| 13 | |
| 14 use strict; | |
| 15 use warnings; | |
| 16 | |
| 17 #variables to handle information of the categories input file | |
| 18 my @categoryElementsArray = (); | |
| 19 my @categoriesArray = (); | |
| 20 my $categoryMemberNames; | |
| 21 my $categoryName; | |
| 22 my %categoryMembersHash = (); | |
| 23 my $memberNumber = 0; | |
| 24 my $totalMembersNumber = 0; | |
| 25 my $totalCategoriesNumber = 0; | |
| 26 my @categoryCountersTwoDimArray = (); | |
| 27 my $lineCounter1 = 0; | |
| 28 | |
| 29 #variables to handle information of the criteria and elements data input file | |
| 30 my $elementLine; | |
| 31 my @elementDataArray = (); | |
| 32 my $elementName; | |
| 33 my @criteriaArray = (); | |
| 34 my $criteriaNumber = 0; | |
| 35 my $totalCriteriaNumber = 0; | |
| 36 my $lineCounter2 = 0; | |
| 37 | |
| 38 #variable representing the row and column indices used to store results into a two-dimensional array | |
| 39 my $row = 0; | |
| 40 my $column = 0; | |
| 41 | |
| 42 # check to make sure having correct files | |
| 43 my $usage = "usage: categorize_motifs_significance.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] \n"; | |
| 44 die $usage unless @ARGV == 3; | |
| 45 | |
| 46 #get the categories input file | |
| 47 my $categories_inputFile = $ARGV[0]; | |
| 48 | |
| 49 #get the criteria and data input file | |
| 50 my $elements_data_inputFile = $ARGV[1]; | |
| 51 | |
| 52 #get the output file | |
| 53 my $categorized_data_outputFile = $ARGV[2]; | |
| 54 | |
| 55 #open the input and output files | |
| 56 open (INPUT1, "<", $categories_inputFile) || die("Could not open file $categories_inputFile \n"); | |
| 57 open (INPUT2, "<", $elements_data_inputFile ) || die("Could not open file $elements_data_inputFile \n"); | |
| 58 open (OUTPUT, ">", $categorized_data_outputFile) || die("Could not open file $categorized_data_outputFile \n"); | |
| 59 | |
| 60 #store the first input file into an array | |
| 61 my @categoriesData = <INPUT1>; | |
| 62 | |
| 63 #reset the value of $lineCounter1 to 0 | |
| 64 $lineCounter1 = 0; | |
| 65 | |
| 66 #iterate through the first input file to get the names of categories and their corresponding elements | |
| 67 foreach $categoryMemberNames (@categoriesData){ | |
| 68 chomp ($categoryMemberNames); | |
| 69 | |
| 70 @categoryElementsArray = split(/\t/, $categoryMemberNames); | |
| 71 | |
| 72 #store the name of the current category into an array | |
| 73 $categoriesArray [$lineCounter1] = $categoryElementsArray[0]; | |
| 74 | |
| 75 #store the name of the current category into a two-dimensional array | |
| 76 $categoryCountersTwoDimArray [$lineCounter1] [0] = $categoryElementsArray[0]; | |
| 77 | |
| 78 #get the total number of elements in the current category | |
| 79 $totalMembersNumber = @categoryElementsArray; | |
| 80 | |
| 81 #store the names of categories and their corresponding elements into a hash | |
| 82 for ($memberNumber = 1; $memberNumber < $totalMembersNumber; $memberNumber++) { | |
| 83 | |
| 84 $categoryMembersHash{$categoryElementsArray[$memberNumber]} = $categoriesArray[$lineCounter1]; | |
| 85 } | |
| 86 | |
| 87 $lineCounter1++; | |
| 88 } | |
| 89 | |
| 90 #store the second input file into an array | |
| 91 my @elementsData = <INPUT2>; | |
| 92 | |
| 93 #reset the value of $lineCounter2 to 0 | |
| 94 $lineCounter2 = 0; | |
| 95 | |
| 96 #iterate through the second input file in order to count the number of elements | |
| 97 #in each category that satisfy each criterion | |
| 98 foreach $elementLine (@elementsData){ | |
| 99 chomp ($elementLine); | |
| 100 | |
| 101 $lineCounter2++; | |
| 102 | |
| 103 @elementDataArray = split(/\t/, $elementLine); | |
| 104 | |
| 105 #if at the first line, get the total number of criteria and the total | |
| 106 #number of catergories and initialize the two-dimensional array | |
| 107 if ($lineCounter2 == 1){ | |
| 108 @criteriaArray = @elementDataArray; | |
| 109 $totalCriteriaNumber = @elementDataArray; | |
| 110 | |
| 111 $totalCategoriesNumber = @categoriesArray; | |
| 112 | |
| 113 #initialize the two-dimensional array | |
| 114 for ($row = 0; $row < $totalCategoriesNumber; $row++) { | |
| 115 | |
| 116 for ($column = 1; $column <= $totalCriteriaNumber; $column++) { | |
| 117 | |
| 118 $categoryCountersTwoDimArray [$row][$column] = 0; | |
| 119 } | |
| 120 } | |
| 121 } | |
| 122 else{ | |
| 123 #get the element data | |
| 124 $elementName = $elementDataArray[0]; | |
| 125 | |
| 126 #do the counting and store the result in the two-dimensional array | |
| 127 for ($criteriaNumber = 0; $criteriaNumber < $totalCriteriaNumber; $criteriaNumber++) { | |
| 128 | |
| 129 if ($elementDataArray[$criteriaNumber + 1] > 0){ | |
| 130 | |
| 131 $categoryName = $categoryMembersHash{$elementName}; | |
| 132 | |
| 133 my ($categoryIndex) = grep $categoriesArray[$_] eq $categoryName, 0 .. $#categoriesArray; | |
| 134 | |
| 135 $categoryCountersTwoDimArray [$categoryIndex] [$criteriaNumber + 1] = $categoryCountersTwoDimArray [$categoryIndex] [$criteriaNumber + 1] + $elementDataArray[$criteriaNumber + 1]; | |
| 136 } | |
| 137 } | |
| 138 } | |
| 139 } | |
| 140 | |
| 141 print OUTPUT "\t"; | |
| 142 | |
| 143 #store the criteria names into the output file | |
| 144 for ($column = 1; $column <= $totalCriteriaNumber; $column++) { | |
| 145 | |
| 146 if ($column < $totalCriteriaNumber){ | |
| 147 print OUTPUT $criteriaArray[$column - 1] . "\t"; | |
| 148 } | |
| 149 else{ | |
| 150 print OUTPUT $criteriaArray[$column - 1] . "\n"; | |
| 151 } | |
| 152 } | |
| 153 | |
| 154 #store the category names and their corresponding number of elements satisfying criteria into the output file | |
| 155 for ($row = 0; $row < $totalCategoriesNumber; $row++) { | |
| 156 | |
| 157 for ($column = 0; $column <= $totalCriteriaNumber; $column++) { | |
| 158 | |
| 159 if ($column < $totalCriteriaNumber){ | |
| 160 print OUTPUT $categoryCountersTwoDimArray [$row][$column] . "\t"; | |
| 161 } | |
| 162 else{ | |
| 163 print OUTPUT $categoryCountersTwoDimArray [$row][$column] . "\n"; | |
| 164 } | |
| 165 } | |
| 166 } | |
| 167 | |
| 168 #close the input and output file | |
| 169 close(OUTPUT); | |
| 170 close(INPUT2); | |
| 171 close(INPUT1); | |
| 172 |
