Mercurial > repos > devteam > hgv_fundo
diff disease_ontology_gene_fuzzy_selector.pl @ 0:6b09d2ed034f draft
Uploaded tool tarball.
author | devteam |
---|---|
date | Tue, 20 Aug 2013 10:55:38 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/disease_ontology_gene_fuzzy_selector.pl Tue Aug 20 10:55:38 2013 -0400 @@ -0,0 +1,64 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +################################################################## +# Select genes that are associated with the diseases listed in the +# disease ontology. +# ontology: http://do-wiki.nubic.northwestern.edu/index.php/Main_Page +# gene associations by FunDO: http://projects.bioinformatics.northwestern.edu/do_rif/ +# Sept 2010, switch to doLite +# input: build outfile sourceFileLoc.loc term or partial term +################################################################## + +if (!@ARGV or @ARGV < 3) { + print "usage: disease_ontology_gene_selector.pl build outfile.txt sourceFile.loc [list of terms]\n"; + exit; +} + +my $build = shift @ARGV; +my $out = shift @ARGV; +my $in = shift @ARGV; +my $term = shift @ARGV; +$term =~ s/^'//; #remove quotes protecting from shell +$term =~ s/'$//; +my $data; +open(LOC, $in) or die "Couldn't open $in, $!\n"; +while (<LOC>) { + chomp; + if (/^\s*#/) { next; } + my @f = split(/\t/); + if ($f[0] eq $build) { + if ($f[1] eq 'disease associated genes') { + $data = $f[2]; + } + } +} +close LOC or die "Couldn't close $in, $!\n"; +if (!$data) { + print "Error $build not found in $in\n"; + exit; +} +if (!defined $term) { + print "No disease term entered\n"; + exit; +} + +#start with just fuzzy term matches +open(OUT, ">", $out) or die "Couldn't open $out, $!\n"; +open(FH, $data) or die "Couldn't open data file $data, $!\n"; +$term =~ s/\s+/|/g; #use OR between words +while (<FH>) { + chomp; + my @f = split(/\t/); #chrom start end strand geneName geneID disease + if ($f[6] =~ /($term)/i) { + print OUT join("\t", @f), "\n"; + }elsif ($term eq 'disease') { #print all with disease + print OUT join("\t", @f), "\n"; + } +} +close FH or die "Couldn't close data file $data, $!\n"; +close OUT or die "Couldn't close $out, $!\n"; + +exit;