graphprot_predict_profile: graphprot_predict_profile

comparison graphprot_predict_profile_wrapper.pl @ 0:0bc573ec2010 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit 9bb5f3c8ed8e87ec5652b5bc8bf9c774d5534a1a

author	bgruening
date	Fri, 25 May 2018 11:48:57 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:0bc573ec2010
+#!/usr/bin/perl
+use strict;
+use warnings;
+use Getopt::Long;
+use Pod::Usage;
+use Cwd qw(getcwd abs_path);
+use List::Util qw(sum);
+=head1 NAME
+=head1 SYNOPSIS
+Galaxy wrapper script for GraphProt (-action predict_profile) to compute
+the binding profile for a given model on a given set of sequences provided
+in FASTA format. After profile prediction, average profiles get computed,
+scores signified and binding peak regions extracted. The score signification
+is done using the provided fitted GEV parameters, either from .params file
+or manually set. If score threshold is set (-thr-sc), p-value assignment
+will be skipped and set score threshold will be used to extract peak
+regions. NOTE: Additional lines .params file are used to store and get
+GEV parameters, as well as type of model (model_type: sequence|structure).
+Also, this wrapper currently works for classification mode only.
+PARAMETERS:
+-help|h         display help page
+-fasta          Input FASTA file (option -fasta)
+-model          Input .model file (option -model)
+-params         Input .params file
+NOTE: uses .params file with additional
+parameters
+Manually set parameters (below) will override
+found settings in .params file
+-data-id        Data ID (option -prefix)
+GraphProt model parameters (by default get from .params file):
+-onlyseq        Set if model is a sequence model
+-R              GraphProt model R parameter
+-D              GraphProt model D parameter
+-epochs         GraphProt model epochs parameter
+-lambda         GraphProt model lambda parameter
+-bitsize        GraphProt model bitsize parameter
+-abstraction    GraphProt model RNAshapes abstraction level
+parameter (set for structure models)
+Peak region extraction parameters:
+-thr-sc         Score threshold for extracting peak regions
+By default p-value of 0.05 is used. If no p-value
+calculation possible, -thr-sc is used with default: 0
+-thr-p          p-value threshold for extracting peak regions
+By default, peak regions with p = 0.05 are extracted,
+as well as p50 score peak regions (if info given)
+Default: 0.05
+-merge-dist     Maximum merge distance for nearby peak regions
+Default: report all non-overlapping regions
+-p50-out        Output p50 score filtered peak regions BED file
+default: false
+GEV distribution parameters:
+-distr-my       GEV distribution my parameter for calculating p-values
+from scores
+-distr-sigma    GEV distrubution sigma parameter for calculating
+p-values from scores
+-distr-xi       GEV distribution xi parameter for calculating p-values
+from scores
+-ap-extlr       Used average profile left right extension for
+averaging scores, which were used for distribution
+fitting. NOTE: usually a value of 5 was used for
+for getting GEV distribution and parameters. If you
+choose a different value here, calculated p-values
+will be wrong!
+default : 5
+=head1 DISCRIPTION
+5) Write manual
+6) add output p50 file with NOTE
+6) put GP into rna_tools
+NOTE:
+Additional lines .params file used to store and get gev parameters, as well
+as type of model (model_type: sequence|structure).
+Example .params content:
+epochs: 20
+lambda: 0.001
+R: 1
+D: 4
+bitsize: 14
+model_type: sequence
+#ADDITIONAL MODEL PARAMETERS
+ap_extlr: 5
+gev_my: -2.5408
+gev_sigma: 1.6444
+gev_xi: -0.1383
+p50_score: 6.51534
+p50_p_val: 0.0009059744
+=cut
+############################
+# COMMAND LINE CHECKING.
+############################
+# Command line argument variables.
+my ($i_help, $i_fasta, $i_model, $i_params, $i_data_id, $i_thr_sc, $i_thr_p, $i_max_merge_dist, $i_p50_out, $i_gp_r, $i_gp_d, $i_gp_epochs, $i_gp_lambda, $i_gp_bitsize, $i_gp_abstr, $i_gp_onlyseq, $i_distr_my, $i_distr_sigma, $i_distr_xi, $i_ap_extlr);
+# Parse the command line.
+GetOptions ( "help|h" => \$i_help,
+"fasta:s" => \$i_fasta,
+"model:s" => \$i_model,
+"params:s" => \$i_params,
+"data-id:s" => \$i_data_id,
+"thr-sc:f" => \$i_thr_sc,
+"thr-p:f" => \$i_thr_p,
+"p50-out" => \$i_p50_out,
+"merge-dist:i" => \$i_max_merge_dist,
+"R:i" => \$i_gp_r,
+"D:i" => \$i_gp_d,
+"epochs:i" => \$i_gp_epochs,
+"lambda:f" => \$i_gp_lambda,
+"bitsize:i" => \$i_gp_bitsize,
+"abstr:i" => \$i_gp_abstr,
+"onlyseq" => \$i_gp_onlyseq,
+"distr-my:f" => \$i_distr_my,
+"distr-sigma:f" => \$i_distr_sigma,
+"distr-xi:f" => \$i_distr_xi,
+"ap-extlr:i" => \$i_ap_extlr ) or pod2usage(1);
+# Check.
+pod2usage(1) if $i_help;
+($i_fasta and $i_model) or pod2usage "ERROR: -fasta, -model are mandatory";
+if ($i_distr_my or $i_distr_sigma or $i_distr_xi) {
+($i_distr_my and $i_distr_sigma and $i_distr_xi) or pod2usage "ERROR: expects all three distribution parameters to be set";
+}
+#######################
+# SET PARAMETERS.
+#######################
+# Prefix.
+my $data_id = "GraphProt";
+if ($i_data_id) {
+$data_id = $i_data_id;
+}
+my $thr_sc = 0;
+if ($i_thr_sc) {
+$thr_sc = $i_thr_sc;
+}
+my $thr_p = 0.05;
+if ($i_thr_p) {
+$thr_p = $i_thr_p;
+}
+my $ap_extlr = 5;
+if ($i_ap_extlr) {
+$ap_extlr = $i_ap_extlr;
+}
+my $max_merge_dist = 0;
+if ($i_max_merge_dist) {
+$max_merge_dist = $i_max_merge_dist;
+}
+# Get parameters from .params file.
+my %params;
+if ($i_params) {
+open(IN, $i_params) or die "Cannot open $i_params: $!";
+while(<IN>) {
+next if ($_ =~ /^#/);
+if ($_ =~ /(.+):\s(.+)/) {
+$params{$1} = $2;
+}
+}
+close IN;
+}
+# Create GP parameter string.
+my $params_string = "";
+# -onlyseq
+my $model_type = "structure";
+if (exists $params{"model_type"}) {
+if ($params{"model_type"} eq "sequence") {
+$params_string .= " -onlyseq";
+$model_type = "sequence";
+}
+} elsif ($i_gp_onlyseq) {
+$params_string .= " -onlyseq";
+$model_type = "sequence";
+}
+# -R
+if ($i_gp_r) {
+$params_string .= " -R $i_gp_r";
+} elsif (exists $params{"R"}) {
+my $v = $params{"R"};
+$params_string .= " -R $v";
+} else {
+die "ERROR: -R needs to be set";
+}
+# -D
+if ($i_gp_d) {
+$params_string .= " -D $i_gp_d";
+} elsif (exists $params{"D"}) {
+my $v = $params{"D"};
+$params_string .= " -D $v";
+} else {
+die "ERROR: -D needs to be set";
+}
+# -epochs
+if ($i_gp_epochs) {
+$params_string .= " -epochs $i_gp_epochs";
+} elsif (exists $params{"epochs"}) {
+my $v = $params{"epochs"};
+$params_string .= " -epochs $v";
+} else {
+die "ERROR: -epochs needs to be set";
+}
+# -lambda
+if ($i_gp_lambda) {
+$params_string .= " -lambda $i_gp_lambda";
+} elsif (exists $params{"lambda"}) {
+my $v = $params{"lambda"};
+$params_string .= " -lambda $v";
+} else {
+die "ERROR: -lambda needs to be set";
+}
+# -bitsize
+if ($i_gp_bitsize) {
+$params_string .= " -bitsize $i_gp_bitsize";
+} elsif (exists $params{"bitsize"}) {
+my $v = $params{"bitsize"};
+$params_string .= " -bitsize $v";
+} else {
+die "ERROR: -bitsize needs to be set";
+}
+# -abstraction
+if ($i_gp_abstr) {
+$params_string .= " -bitsize $i_gp_abstr";
+die "ERROR: -abstraction set with -onlyseq" unless ($model_type eq "structure");
+} elsif (exists $params{"abstraction"}) {
+my $v = $params{"abstraction"};
+$params_string .= " -abstraction $v";
+die "ERROR: -abstraction set with -onlyseq" unless ($model_type eq "structure");
+}
+# Distribution parameters.
+my ($distr_my, $distr_sigma, $distr_xi);
+if ($i_distr_my) {
+$distr_my = $i_distr_my;
+} elsif (exists $params{"gev_my"}) {
+$distr_my = $params{"gev_my"};
+}
+if ($i_distr_sigma) {
+$distr_sigma = $i_distr_sigma;
+} elsif (exists $params{"gev_sigma"}) {
+$distr_sigma = $params{"gev_sigma"};
+}
+if ($i_distr_xi) {
+$distr_xi = $i_distr_xi;
+} elsif (exists $params{"gev_xi"}) {
+$distr_xi = $params{"gev_xi"};
+}
+# Average profile extension parameter.
+if (exists $params{"ap_extlr"}) {
+$ap_extlr = $params{"ap_extlr"};
+}
+print STDOUT "model_type:     $model_type\n";
+print STDOUT "ap_extlr:       $ap_extlr\n";
+if ($distr_my and $distr_sigma and $distr_xi) {
+print STDOUT "distr_my:       $distr_my\n";
+print STDOUT "distr_sigma:    $distr_sigma\n";
+print STDOUT "distr_xi:       $distr_xi\n";
+}
+print STDOUT "max_merge_dist: $max_merge_dist\n";
+# p50 filter score.
+my $p50_sc;
+if (exists $params{"p50_score"}) {
+$p50_sc = $params{"p50_score"};
+print STDOUT "p50_score:      $p50_sc\n";
+}
+# p50 p-value.
+my $p50_p;
+if (exists $params{"p50_p_val"}) {
+$p50_p = $params{"p50_p_val"};
+print STDOUT "p50_p_val:      $p50_p\n";
+}
+##################################
+# RUN GP PROFILE PREDICTION.
+##################################
+# Read in FASTA file headers.
+my @fasta_ids;
+my $headers = qx/grep ">" $i_fasta/;
+while ($headers =~ />(.+?)\n/g) {
+push(@fasta_ids,$1);
+}
+# Run GP profile prediction.
+my $gp_call = "GraphProt.pl -action predict_profile -model $i_model -fasta $i_fasta -prefix $data_id $params_string";
+print STDOUT "GraphProt call: $gp_call\n";
+# &> profile_prediction.log
+qx/$gp_call/;
+####################################
+# CALCULATE AVERAGE PROFILE.
+####################################
+# Calculate .average_profile from GraphProt .profile file.
+# Also add p-value column if distr parameters set.
+my $add_p = 0;
+if ($distr_my and $distr_sigma and $distr_xi) {
+$add_p = 1;
+}
+# Average_profile: 1-based, FASTA headers as IDs, using ap_extlr for averaging scores.
+my $profile = "$data_id.profile";
+my $average_profile = "$data_id.average_profile";
+# Calculate window size.
+my $win = $ap_extlr * 2 + 1;
+open (IN, $profile) or die "Cannot open $profile: $!\n";
+open (OUT, '>', $average_profile) or die "Cannot open $average_profile: $!";
+# Old ID.
+my $old_id = "-";
+# Current ID.
+my $cur_id = "-";
+# Start position of the window.
+my $pos_inc = 0;
+# Score array.
+my @scores;
+# Input row counter.
+my $c_in = 0;
+# Output row counter.
+my $c_out = 0;
+while (<IN>) {
+if ($_ =~ /(.+?)\t\d+?\t(.+?)\n/) {
+$cur_id = $1;
+my $score = $2;
+$c_in++;
+# Case: New refseq ID / new paragraph.
+if ($cur_id ne $old_id) {
+# Print remaining entries at the end of former paragraph.
+while (scalar(@scores) >= ($ap_extlr + 1)) {
+# Calculate avg array score.
+my $mean = array_mean(@scores);
+$c_out++;
+$pos_inc++;
+if ($add_p) {
+my $y = exp(-1*(1 + ( ($mean-$distr_my)/$distr_sigma )*$distr_xi)**(-1/$distr_xi));
+my $p = sprintf("%.8f", (1-$y));
+print OUT "$fasta_ids[$old_id]\t$pos_inc\t$mean\t$p\n";
+} else {
+print OUT "$fasta_ids[$old_id]\t$pos_inc\t$mean\n";
+}
+# Remove old score from beginning.
+shift(@scores);
+}
+# Reset for new paragraph.
+$pos_inc = 0;
+@scores = ();
+$old_id = $cur_id;
+# Save first score.
+push(@scores, $score);
+next;
+}
+# Push score in array for average calculation.
+push(@scores, $score);
+# Case: array smaller $win.
+if (scalar(@scores) < $win) {
+# Subcase: array more than half size, start with calculation.
+if (scalar(@scores) >= ($ap_extlr + 1)) {
+# Calculate avg array score.
+my $mean = array_mean(@scores);
+$c_out++;
+$pos_inc++;
+if ($add_p) {
+my $y = exp(-1*(1 + ( ($mean-$distr_my)/$distr_sigma )*$distr_xi)**(-1/$distr_xi));
+my $p = sprintf("%.8f", (1-$y));
+print OUT "$fasta_ids[$cur_id]\t$pos_inc\t$mean\t$p\n";
+} else {
+print OUT "$fasta_ids[$cur_id]\t$pos_inc\t$mean\n";
+}
+}
+next;
+}
+# Case: array has $win size.
+if (scalar(@scores) == $win) {
+# Calculate avg array score.
+my $mean = array_mean(@scores);
+$c_out++;
+$pos_inc++;
+if ($add_p) {
+my $y = exp(-1*(1 + ( ($mean-$distr_my)/$distr_sigma )*$distr_xi)**(-1/$distr_xi));
+my $p = sprintf("%.8f", (1-$y));
+print OUT "$fasta_ids[$cur_id]\t$pos_inc\t$mean\t$p\n";
+} else {
+print OUT "$fasta_ids[$cur_id]\t$pos_inc\t$mean\n";
+}
+# Remove old score from beginning.
+shift(@scores);
+next;
+}
+}
+}
+# Print remaining entries at the end of last section.
+while (scalar(@scores) >= ($ap_extlr + 1)) {
+# Calculate avg array score.
+my $mean = array_mean(@scores);
+$c_out++;
+$pos_inc++;
+if ($add_p) {
+my $y = exp(-1*(1 + ( ($mean-$distr_my)/$distr_sigma )*$distr_xi)**(-1/$distr_xi));
+my $p = sprintf("%.8f", (1-$y));
+print OUT "$fasta_ids[$cur_id]\t$pos_inc\t$mean\t$p\n";
+} else {
+print OUT "$fasta_ids[$cur_id]\t$pos_inc\t$mean\n";
+}
+# Remove old score from beginning.
+shift(@scores);
+}
+close IN;
+close OUT;
+qx/rm -f $profile/;
+#########################
+# GET PEAK REGIONS.
+#########################
+my $p50_peaks_bed_out = $data_id . ".peak_regions_p50.bed";
+my $peaks_bed_out = $data_id . ".peak_regions.bed";
+# If p-values were calculated, use set p-value to get peak regions.
+if ($add_p) {
+extract_peak_regions_from_p_values($average_profile, $peaks_bed_out, $max_merge_dist, $thr_p);
+# If p50-out set.
+if ($i_p50_out) {
+# If p50 p-value present, also get peak regions file for this threshold.
+if ($p50_p) {
+extract_peak_regions_from_p_values($average_profile, $p50_peaks_bed_out, $max_merge_dist, $p50_p);
+} else {
+qx/touch $p50_peaks_bed_out/;
+}
+}
+} else {
+# If no p-values available, use score threshold for defining peak regions.
+extract_peak_regions_from_scores($average_profile, $peaks_bed_out, $max_merge_dist, $thr_sc);
+# If p50-out set.
+if ($i_p50_out) {
+# If p50 score present, also get peak regions file for this threshold.
+if ($p50_sc) {
+extract_peak_regions_from_scores($average_profile, $p50_peaks_bed_out, $max_merge_dist, $p50_sc);
+} else {
+qx/touch $p50_peaks_bed_out/;
+}
+}
+}
+exit;
+################################################################################
+################################################################################
+# SUBROUTINES.
+################################################################################
+sub array_mean {
+my $mean = sum(@_)/@_;
+return sprintf("%.5f", $mean);
+}
+################################################################################
+sub extract_peak_regions_from_p_values {
+my ($average_profile, $peak_regions_bed_file, $max_merge_dist, $max_p) = @_;
+my $old_ref = "no";
+my $in = "N";
+my $ref_id;
+my $start;
+my $end;
+my $best_p = 1;
+my $best_sc = -1000;
+my $region_s;
+my $region_e;
+my $zero_pos;
+my $temp_bed_file = "peak_regions.tmp.bed";
+open (IN, $average_profile) or die "Cannot open $average_profile: $!";
+open (OUT, '>', $temp_bed_file) or die "Cannot open $temp_bed_file: $!";
+while (<IN>) {
+chomp;
+my ($ref, $s, $sc, $p) = (split /\t/)[0,1,2,3];
+# If file has zero-based positions.
+if ($s == 0) {
+$zero_pos = 1;
+}
+# If positions are one-based, make them zero-based.
+unless ($zero_pos) {
+$s -= 1;
+}
+# At transcript ends, if in positive region, write and reset.
+if ($old_ref ne $ref) {
+if ($in eq "Y") {
+print OUT "$ref_id\t$region_s\t$region_e\t$end;$best_sc;$best_p\t0\t+\n";
+$in = "N";
+}
+}
+$old_ref = $ref;
+# Deal with good p-value regions.
+if ($p <= $max_p) {
+# Start of a positive cluster.
+if ($in eq "N") {
+$start = $s;
+$region_s = $s;
+$region_e = $s + 1;
+$end = $s + 1;
+$best_p = $p;
+$best_sc = $sc;
+$ref_id = $ref;
+$in = "Y";
+next;
+# Inside a positive cluster.
+} elsif ($in eq "Y") {
+if ($p < $best_p) {
+$start = $s;
+$end = $s + 1;
+$best_p = $p;
+$best_sc = $sc;
+$ref_id = $ref;
+}
+$region_e++;
+next;
+}
+} else {
+# If we were in positive cluster before.
+if ($in eq "Y") {
+print OUT "$ref_id\t$region_s\t$region_e\t$end;$best_sc;$best_p\t0\t+\n";
+$in = "N";
+}
+}
+}
+# After last line processed.
+if ($in eq "Y") {
+print OUT "$ref_id\t$region_s\t$region_e\t$end;$best_sc;$best_p\t0\t+\n";
+$in = "N";
+}
+close IN;
+close OUT;
+# If merge distance zero (i.e. end of one block is -1 from start of next block).
+if ($max_merge_dist == 0) {
+qx/cat $temp_bed_file > $peak_regions_bed_file/;
+} else {
+# Merge nearby regions.
+open(IN, $temp_bed_file) or die "Cannot open $temp_bed_file: $!";
+open(OUT, '>', $peak_regions_bed_file) or die "Cannot open $peak_regions_bed_file: $!";
+# For storing current block stats.
+my $block_chr = 0;
+my ($block_s, $block_e, $block_best_pos, $block_best_p, $block_best_sc);
+while (<IN>) {
+chomp;
+my ($chr, $s, $e, $id) = (split /\t/)[0,1,2,3];
+my ($best_pos, $best_sc, $best_p) = (split /;/, $id);
+if ($chr eq $block_chr) {
+# If $block_e, $s within merge merge.
+if ( ($s - $block_e) <= $max_merge_dist ) {
+# Update block stats.
+$block_e = $e;
+if ($block_best_p > $best_p) {
+$block_best_p = $best_p;
+$block_best_sc = $best_sc;
+$block_best_pos = $best_pos;
+}
+} else {
+# If $e outside merge range, print block.
+print OUT "$block_chr\t$block_s\t$block_e\t$block_best_pos;$block_best_sc;$block_best_p\t0\t+\n";
+# Store new block.
+($block_chr, $block_s, $block_e, $block_best_pos, $block_best_sc, $block_best_p) = ($chr, $s, $e, $best_pos, $best_sc, $best_p);
+}
+} else {
+# If new chromosome, print last block, otherwise it is the first block.
+if ($block_chr) {
+print OUT "$block_chr\t$block_s\t$block_e\t$block_best_pos;$block_best_sc;$block_best_p\t0\t+\n";
+}
+($block_chr, $block_s, $block_e, $block_best_pos, $block_best_sc, $block_best_p) = ($chr, $s, $e, $best_pos, $best_sc, $best_p);
+}
+}
+# Print last block.
+if ($block_chr) {
+print OUT "$block_chr\t$block_s\t$block_e\t$block_best_pos;$block_best_sc;$block_best_p\t0\t+\n";
+}
+close OUT;
+close IN;
+}
+qx/rm -f $temp_bed_file/;
+}
+################################################################################
+sub extract_peak_regions_from_scores {
+my ($average_profile, $peak_regions_bed_file, $max_merge_dist, $min_sc) = @_;
+my $old_ref = "no";
+my $in = "N";
+my $ref_id;
+my $start;
+my $end;
+my $best_sc = -1000;
+my $region_s;
+my $region_e;
+my $zero_pos;
+my $temp_bed_file = "peak_regions.tmp.bed";
+open (IN, $average_profile) or die "Cannot open $average_profile: $!";
+open (OUT, '>', $temp_bed_file) or die "Cannot open $temp_bed_file: $!";
+while (<IN>) {
+chomp;
+my ($ref, $s, $sc) = (split /\t/)[0,1,2];
+# If file has zero-based positions.
+if ($s == 0) {
+$zero_pos = 1;
+}
+# If positions are one-based, make them zero-based.
+unless ($zero_pos) {
+$s -= 1;
+}
+# At transcript ends, if in positive region, write and reset.
+if ($old_ref ne $ref) {
+if ($in eq "Y") {
+print OUT "$ref_id\t$region_s\t$region_e\t$end;$best_sc\t0\t+\n";
+$in = "N";
+}
+}
+$old_ref = $ref;
+# Deal with positive regions.
+if ($sc > $min_sc) {
+# Start of a positive cluster.
+if ($in eq "N") {
+$start = $s;
+$region_s = $s;
+$region_e = $s + 1;
+$end = $s + 1;
+$best_sc = $sc;
+$ref_id = $ref;
+$in = "Y";
+next;
+# Inside a positive cluster.
+} elsif ($in eq "Y") {
+if ($sc > $best_sc) {
+$start = $s;
+$end = $s + 1;
+$best_sc = $sc;
+$ref_id = $ref;
+}
+$region_e++;
+next;
+}
+} else {
+# If we were in positive cluster before.
+if ($in eq "Y") {
+print OUT "$ref_id\t$region_s\t$region_e\t$end;$best_sc\t0\t+\n";
+$in = "N";
+}
+}
+}
+# After last line processed.
+if ($in eq "Y") {
+print OUT "$ref_id\t$region_s\t$region_e\t$end;$best_sc\t0\t+\n";
+$in = "N";
+}
+close IN;
+close OUT;
+# If merge distance zero (i.e. end of one block is -1 from start of next block).
+if ($max_merge_dist == 0) {
+qx/cat $temp_bed_file > $peak_regions_bed_file/;
+} else {
+# Merge nearby regions.
+open(IN, $temp_bed_file) or die "Cannot open $temp_bed_file: $!";
+open(OUT, '>', $peak_regions_bed_file) or die "Cannot open $peak_regions_bed_file: $!";
+# For storing current block stats.
+my $block_chr = 0;
+my ($block_s, $block_e, $block_best_pos, $block_best_sc);
+while (<IN>) {
+chomp;
+my ($chr, $s, $e, $id) = (split /\t/)[0,1,2,3];
+my ($best_pos, $best_sc) = (split /;/, $id);
+if ($chr eq $block_chr) {
+# If $block_e, $s within merge merge.
+if ( ($s - $block_e) <= $max_merge_dist ) {
+# Update block stats.
+$block_e = $e;
+if ($block_best_sc < $best_sc) {
+$block_best_sc = $best_sc;
+$block_best_pos = $best_pos;
+}
+} else {
+# If $e outside merge range, print block.
+print OUT "$block_chr\t$block_s\t$block_e\t$block_best_pos;$block_best_sc\t0\t+\n";
+# Store new block.
+($block_chr, $block_s, $block_e, $block_best_pos, $block_best_sc) = ($chr, $s, $e, $best_pos, $best_sc);
+}
+} else {
+# If new chromosome, print last block, otherwise it is the first block.
+if ($block_chr) {
+print OUT "$block_chr\t$block_s\t$block_e\t$block_best_pos;$block_best_sc\t0\t+\n";
+}
+($block_chr, $block_s, $block_e, $block_best_pos, $block_best_sc) = ($chr, $s, $e, $best_pos, $best_sc);
+}
+}
+# Print last block.
+if ($block_chr) {
+print OUT "$block_chr\t$block_s\t$block_e\t$block_best_pos;$block_best_sc\t0\t+\n";
+}
+close OUT;
+close IN;
+}
+qx/rm -f $temp_bed_file/;
+}
+################################################################################

Mercurial > repos > bgruening > graphprot_predict_profile

comparison graphprot_predict_profile_wrapper.pl @ 0:0bc573ec2010 draft default tip