Mercurial > repos > jjohnson > defuse
diff tool_dependencies.xml @ 18:547d8db4673e
Update create_reference_dataset for non human genome builds
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Sat, 15 Jun 2013 14:36:47 -0500 |
parents | 3df516ce3ad6 |
children | 103d61bfa1f3 |
line wrap: on
line diff
--- a/tool_dependencies.xml Wed Jun 12 21:03:18 2013 -0500 +++ b/tool_dependencies.xml Sat Jun 15 14:36:47 2013 -0500 @@ -26,6 +26,23 @@ </repository> </action> <action type="shell_command">export CPLUS_INCLUDE_PATH=$BOOST_ROOT_DIR:$CPLUS_INCLUDE_PATH && cd tools && make</action> + <!-- modify create_reference_dataset.pl to handle more than just human genomes --> + <action type="shell_command"> + cd scripts && + cp create_reference_dataset.pl create_reference_dataset.pl.orig && + cat create_reference_dataset.pl.orig | + sed 's#wget_gunzip("ftp://hgdownload.cse.ucsc.edu/goldenPath/$ucsc_genome_version/database/rmsk.txt.gz", $repeats_filename);##' | + sed 's#wget_gunzip("ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/intronEst.txt.gz", $est_alignments);##' | + sed 's#^\(my .*ensembl_genome_version.*config.*get_value.*;\)#\1Qmy $ensembl_organism = $config->get_value("ensembl_organism");Qmy $ensembl_prefix = $config->get_value("ensembl_prefix");Qmy $ncbi_organism = $config->get_value("ncbi_organism");Qmy $ncbi_prefix = $config->get_value("ncbi_prefix");#' | + sed 's/^\(sub wget_gunzip\)/sub try_wgetQ{Q my $url = shift;Q my $filename = shift;Q my $filename_gz = $filename.".gz";Q my $rslt = system "wget $url -O $filename_gz";Q if($rslt == 0)Q {Q $rslt = system "gunzip $filename_gz";Q }Q return $rslt;Q}QQ\1/' | + tr 'Q' '\n' | + awk 'BEGIN{pfx="p1";fn="p2";}/if \(not -e \$repeats_filename\)/{pfx="rmsk";fn="repeats_filename";} /if \(not -e \$est_alignments\)/{pfx="intronEst";fn="est_alignments"} /ucsc_genome_version eq "hg18"/{printf("\tif (try_wget(\"ftp://hgdownload.cse.ucsc.edu/goldenPath/$ucsc_genome_version/database/%s.txt.gz\", \$%s) != 0)\n",pfx,fn);} $0 !~ /ucsc_genome_version eq "hg18/{print $0;}' | + sed 's#UniGene/Homo_sapiens#UniGene/$ncbi_organism#' | + sed 's/Hs.seq.uniq.gz/$ncbi_prefix.seq.uniq.gz/' | + sed 's/homo_sapiens/$ensembl_organism/' | + sed 's/Homo_sapiens/$ensembl_prefix/' | + sed 's/hg19/$ucsc_genome_version/' > create_reference_dataset.pl + </action> <action type="move_directory_files"> <source_directory>.</source_directory> <destination_directory>$INSTALL_DIR</destination_directory>