diff tool_dependencies.xml @ 18:547d8db4673e

Update create_reference_dataset for non human genome builds
author Jim Johnson <jj@umn.edu>
date Sat, 15 Jun 2013 14:36:47 -0500
parents 3df516ce3ad6
children 103d61bfa1f3
line wrap: on
line diff
--- a/tool_dependencies.xml	Wed Jun 12 21:03:18 2013 -0500
+++ b/tool_dependencies.xml	Sat Jun 15 14:36:47 2013 -0500
@@ -26,6 +26,23 @@
                     </repository>
                 </action>
                 <action type="shell_command">export CPLUS_INCLUDE_PATH=$BOOST_ROOT_DIR:$CPLUS_INCLUDE_PATH &amp;&amp; cd tools &amp;&amp; make</action>
+                <!-- modify create_reference_dataset.pl to handle more than just human genomes -->
+                <action type="shell_command">
+                   cd scripts &amp;&amp; 
+                   cp create_reference_dataset.pl create_reference_dataset.pl.orig &amp;&amp; 
+                   cat create_reference_dataset.pl.orig |
+                   sed 's#wget_gunzip("ftp://hgdownload.cse.ucsc.edu/goldenPath/$ucsc_genome_version/database/rmsk.txt.gz", $repeats_filename);##' |
+                   sed 's#wget_gunzip("ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/intronEst.txt.gz", $est_alignments);##' |
+                   sed 's#^\(my .*ensembl_genome_version.*config.*get_value.*;\)#\1Qmy $ensembl_organism = $config->get_value("ensembl_organism");Qmy $ensembl_prefix = $config->get_value("ensembl_prefix");Qmy $ncbi_organism = $config->get_value("ncbi_organism");Qmy $ncbi_prefix = $config->get_value("ncbi_prefix");#' |
+                   sed 's/^\(sub wget_gunzip\)/sub try_wgetQ{Q my $url = shift;Q my $filename = shift;Q my $filename_gz = $filename.".gz";Q my $rslt = system "wget $url -O $filename_gz";Q if($rslt == 0)Q {Q  $rslt = system "gunzip $filename_gz";Q }Q return $rslt;Q}QQ\1/' |
+                   tr 'Q' '\n' |
+                   awk 'BEGIN{pfx="p1";fn="p2";}/if \(not -e \$repeats_filename\)/{pfx="rmsk";fn="repeats_filename";} /if \(not -e \$est_alignments\)/{pfx="intronEst";fn="est_alignments"} /ucsc_genome_version eq "hg18"/{printf("\tif (try_wget(\"ftp://hgdownload.cse.ucsc.edu/goldenPath/$ucsc_genome_version/database/%s.txt.gz\", \$%s) != 0)\n",pfx,fn);} $0 !~ /ucsc_genome_version eq "hg18/{print $0;}' |
+                   sed 's#UniGene/Homo_sapiens#UniGene/$ncbi_organism#' |
+                   sed 's/Hs.seq.uniq.gz/$ncbi_prefix.seq.uniq.gz/' |
+                   sed 's/homo_sapiens/$ensembl_organism/' |
+                   sed 's/Homo_sapiens/$ensembl_prefix/' |
+                   sed 's/hg19/$ucsc_genome_version/' > create_reference_dataset.pl
+                </action>
                 <action type="move_directory_files">
                     <source_directory>.</source_directory>
                     <destination_directory>$INSTALL_DIR</destination_directory>