# HG changeset patch # User pjbriggs # Date 1521717686 14400 # Node ID 3f8bf1a0403b5c09c1811d6e308014ae5b34744a # Parent 88c972081f156f21a792c879e66823930c3accbe Uploaded version with bad primer ranger detection (WIP). diff -r 88c972081f15 -r 3f8bf1a0403b README.rst --- a/README.rst Thu Mar 15 11:52:19 2018 -0400 +++ b/README.rst Thu Mar 22 07:21:26 2018 -0400 @@ -61,6 +61,9 @@ Version Changes ---------- ---------------------------------------------------------------------- +0.02.04.7 - Trap for errors in ``pal_finder_v0.02.04.pl`` resulting in bad + ranges being supplied to ``primer3_core`` for some reads via + ``PRIMER_PRODUCT_RANGE_SIZE``. 0.02.04.6 - Update to get dependencies using ``conda`` when installed from the toolshed (this removes the explicit dependency on Perl 5.16 introduced in 0.02.04.2, as a result the outputs from the tool are diff -r 88c972081f15 -r 3f8bf1a0403b pal_finder_wrapper.sh --- a/pal_finder_wrapper.sh Thu Mar 15 11:52:19 2018 -0400 +++ b/pal_finder_wrapper.sh Thu Mar 22 07:21:26 2018 -0400 @@ -26,7 +26,8 @@ # --primer-opt-tm VALUE: optimum melting temperature (Celsius) # --primer-pair-max-diff-tm VALUE: max difference between melting temps of left & right primers # --output_config_file FNAME: write a copy of the config.txt file to FNAME -# --filter_microsats FNAME: write output of filter options FNAME +# --bad_primer_ranges FNAME: write a list of the read IDs generating bad primer ranges to FNAME +# --filter_microsats FNAME: write output of filter options to FNAME # -assembly FNAME: run the 'assembly' filter option and write to FNAME # -primers: run the 'primers' filter option # -occurrences: run the 'occurrences' filter option @@ -53,6 +54,9 @@ # Maximum size reporting log file contents MAX_LINES=500 # +# Get helper functions +. $(dirname $0)/pal_finder_wrapper_utils.sh +# # Initialise locations of scripts, data and executables # # Set these in the environment to overide at execution time @@ -63,31 +67,18 @@ # Filter script is in the same directory as this script PALFINDER_FILTER=$(dirname $0)/pal_filter.py if [ ! -f $PALFINDER_FILTER ] ; then - echo No $PALFINDER_FILTER script >&2 - exit 1 + fatal No $PALFINDER_FILTER script fi # # Check that we have all the components -function have_program() { - local program=$1 - local got_program=$(which $program 2>&1 | grep "no $(basename $program) in") - if [ -z "$got_program" ] ; then - echo yes - else - echo no - fi -} if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then - echo "ERROR primer3_core missing: ${PRIMER3_CORE_EXE} not found" >&2 - exit 1 + fatal "primer3_core missing: ${PRIMER3_CORE_EXE} not found" fi if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then - echo "ERROR pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" >&2 - exit 1 + fatal "pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" fi if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then - echo "ERROR pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" >&2 - exit 1 + fatal "pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" fi # # Initialise parameters used in the config.txt file @@ -113,12 +104,13 @@ OUTPUT_ASSEMBLY= FILTERED_MICROSATS= FILTER_OPTIONS= +BAD_PRIMER_RANGES= # # Collect command line arguments if [ $# -lt 2 ] ; then echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" - exits + fatal "Bad command line" fi if [ "$1" == "--454" ] ; then PLATFORM="454" @@ -212,6 +204,10 @@ shift OUTPUT_CONFIG_FILE=$1 ;; + --bad_primer_ranges) + shift + BAD_PRIMER_RANGES=$1 + ;; --filter_microsats) shift FILTERED_MICROSATS=$1 @@ -235,16 +231,14 @@ # Check that primer3_core is available got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"` if [ -z "$got_primer3" ] ; then - echo ERROR primer3_core not found >&2 - exit 1 + fatal "primer3_core not found" fi # # Set up the working dir if [ "$PLATFORM" == "Illumina" ] ; then # Paired end Illumina data as input if [ $FASTQ_R1 == $FASTQ_R2 ] ; then - echo ERROR R1 and R2 fastqs are the same file >&2 - exit 1 + fatal ERROR R1 and R2 fastqs are the same file fi ln -s $FASTQ_R1 ln -s $FASTQ_R2 @@ -264,17 +258,6 @@ /bin/cp $PALFINDER_DATA_DIR/config.txt . # # Update the config.txt file with new values -function set_config_value() { - local key=$1 - local value=$2 - local config_txt=$3 - if [ -z "$value" ] ; then - echo "No value for $key, left as default" - else - echo Setting "$key" to "$value" - sed -i 's,^'"$key"' .*,'"$key"' '"$value"',' $config_txt - fi -} # Input files set_config_value platform $PLATFORM config.txt if [ "$PLATFORM" == "Illumina" ] ; then @@ -299,6 +282,7 @@ # Primer3 settings set_config_value primer3input Output/pr3in.txt config.txt set_config_value primer3output Output/pr3out.txt config.txt +set_config_value keepPrimer3files 1 config.txt set_config_value primer3executable $PRIMER3_CORE_EXE config.txt set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt @@ -329,8 +313,33 @@ # # Check that log ends with "Done!!" message if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then - echo ERROR pal_finder failed to complete successfully >&2 - exit 1 + fatal ERROR pal_finder failed to complete successfully +fi +echo "### pal_finder finished ###" +# +# Check for errors in pal_finder output +echo "### Checking for errors ###" +if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then + echo WARNING primer3 terminated prematurely due to bad product size ranges + if [ -z "$BAD_PRIMER_RANGES" ] ; then + # No output file so report to stderr + cat >&2 <&2 + cat >&2 <"$BAD_PRIMER_RANGES" + fi fi # # Sort microsat_summary output @@ -362,11 +371,9 @@ fi tail -$MAX_LINES pal_filter.log if [ $? -ne 0 ] ; then - echo ERROR $PALFINDER_FILTER exited with non-zero status >&2 - exit 1 + fatal $PALFINDER_FILTER exited with non-zero status elif [ ! -f PAL_summary.filtered ] ; then - echo ERROR no output from $PALFINDER_FILTER >&2 - exit 1 + fatal no output from $PALFINDER_FILTER fi fi # @@ -386,8 +393,7 @@ if [ -f "$assembly" ] ; then /bin/mv $assembly "$OUTPUT_ASSEMBLY" else - echo ERROR no assembly output found >&2 - exit 1 + fatal no assembly output found fi fi if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then diff -r 88c972081f15 -r 3f8bf1a0403b pal_finder_wrapper.xml --- a/pal_finder_wrapper.xml Thu Mar 15 11:52:19 2018 -0400 +++ b/pal_finder_wrapper.xml Thu Mar 22 07:21:26 2018 -0400 @@ -1,4 +1,4 @@ - + Find microsatellite repeat elements from sequencing reads and design PCR primers to amplify them pal_finder_macros.xml @@ -26,6 +26,9 @@ --454 "$platform.input_fasta" #end if $output_microsat_summary $output_pal_summary + #if $report_bad_primer_ranges + --bad_primer_ranges "$output_bad_primer_read_ids" + #end if #if $keep_config_file --output_config_file "$output_config_file" #end if @@ -156,6 +159,7 @@ help="Temperature should be in degrees Celsius" /> + @@ -169,6 +173,9 @@ platform['assembly'] is True + + report_bad_primer_ranges is True + keep_config_file is True @@ -247,6 +254,19 @@ + + + + + + + + + + + + @@ -280,6 +300,29 @@ ------------- +.. class:: warning + +**Known problems** + +.. class:: infomark + +**Bad primer product size ranges** + +For some datasets pal_finder may generate 'bad' product size ranges (where the +lower limit exceeds the upper limit) for one or more reads, for input into +primer3_core. + +If this occurs then the tool will terminate with an error. A list of the reads +for which the bad ranges were generated can be found in the error message +which can be accessed via the 'bug' icon from a failed dataset. + +The conditions which cause this error are unclear. However we believe it to be +associated with short or low quality reads. It is recommended that the input +data are sufficiently trimmed and filtered (using e.g. the Trimmomatic tool) +before rerunning pal_finder. + +------------- + .. class:: infomark **Credits** diff -r 88c972081f15 -r 3f8bf1a0403b pal_finder_wrapper_utils.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pal_finder_wrapper_utils.sh Thu Mar 22 07:21:26 2018 -0400 @@ -0,0 +1,71 @@ +#!/bin/bash +# +# Helper functions for the pal_finder_wrapper.sh script +# +# Utility function for terminating on fatal error +function fatal() { + echo "FATAL $@" >&2 + exit 1 +} +# +# Check that specified program is available +function have_program() { + local program=$1 + local got_program=$(which $program 2>&1 | grep "no $(basename $program) in") + if [ -z "$got_program" ] ; then + echo yes + else + echo no + fi +} +# +# Set the value for a parameter in the pal_finder config file +function set_config_value() { + local key=$1 + local value=$2 + local config_txt=$3 + if [ -z "$value" ] ; then + echo "No value for $key, left as default" + else + echo Setting "$key" to "$value" + sed -i 's,^'"$key"' .*,'"$key"' '"$value"',' $config_txt + fi +} +# +# Identify 'bad' PRIMER_PRODUCT_SIZE_RANGE from pr3in.txt file +function find_bad_primer_ranges() { + # Parses a pr3in.txt file from pal_finder and reports + # sequence ids where the PRIMER_PRODUCT_SIZE_RANGE has + # upper limit which is smaller than lower limit + local pr3in=$1 + local pattern="^(SEQUENCE_ID|PRIMER_PRODUCT_SIZE_RANGE)" + for line in $(grep -E "$pattern" $pr3in | sed 's/ /^/' | sed 'N;s/\n/*/') + do + # Loop over pairs of SEQUENCE_ID and PRIMER_PRODUCT_SIZE_RANGE + # keywords in the primer3 input + if [ ! -z "$(echo $line | grep ^SEQUENCE_ID)" ] ; then + # Extract the values + local size_range=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2 | tr '^' ' ') + local seq_id=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2) + else + local size_range=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2) + local seq_id=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2) + fi + seq_id=$(echo $seq_id | cut -d')' -f3) + # Check the upper and lower limits in each range + # to see if it's okay + local bad_range= + for range in $(echo $size_range) ; do + local lower=$(echo $range | cut -d'-' -f1) + local upper=$(echo $range | cut -d'-' -f2) + if [ $lower -gt $upper ] ; then + bad_range=yes + break + fi + done + # Report if the range is wrong + if [ ! -z "$bad_range" ] ; then + echo "$seq_id ($size_range)" + fi + done +}