diff pal_finder_wrapper.sh @ 15:a3af1ff4cad1 draft

pal_finder 0.02.04.7 for testing.
author pjbriggs
date Mon, 14 May 2018 11:10:19 -0400
parents 3f8bf1a0403b
children
line wrap: on
line diff
--- a/pal_finder_wrapper.sh	Thu Mar 22 07:21:26 2018 -0400
+++ b/pal_finder_wrapper.sh	Mon May 14 11:10:19 2018 -0400
@@ -32,6 +32,7 @@
 # -primers: run the 'primers' filter option
 # -occurrences: run the 'occurrences' filter option
 # -rankmotifs: run the 'rankmotifs' filter option
+# --subset N: use a subset of reads of size N
 #
 # pal_finder is available from http://sourceforge.net/projects/palfinder/
 #
@@ -104,7 +105,8 @@
 OUTPUT_ASSEMBLY=
 FILTERED_MICROSATS=
 FILTER_OPTIONS=
-BAD_PRIMER_RANGES=
+SUBSET=
+RANDOM_SEED=568765
 #
 # Collect command line arguments
 if [ $# -lt 2 ] ; then
@@ -220,6 +222,10 @@
 	    shift
 	    OUTPUT_ASSEMBLY=$1
 	    ;;
+	--subset)
+	    shift
+	    SUBSET=$1
+	    ;;
 	*)
 	    echo Unknown option: $1 >&2
 	    exit 1
@@ -234,6 +240,25 @@
   fatal "primer3_core not found"
 fi
 #
+# Check the n-mers specification
+if [ $MIN_6_MER_REPS -ne 0 ] ; then
+    if [ $MIN_5_MER_REPS -eq 0 ] ; then
+	fatal "Minimum number of 5-mers cannot be zero if number of 6-mers is non-zero"
+    fi
+fi
+if [ $MIN_5_MER_REPS -ne 0 ] ; then
+    if [ $MIN_4_MER_REPS -eq 0 ] ; then
+	fatal "Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero"
+    fi
+fi
+if [ $MIN_4_MER_REPS -ne 0 ] ; then
+    if [ $MIN_3_MER_REPS -eq 0 ] ; then
+	fatal "Minimum number of 3-mers cannot be zero if number of 4-mers is non-zero"
+    fi
+fi
+if [ $MIN_2_MER_REPS -eq 0 ] ; then
+    fatal "Minimum number of 2-mer repeats cannot be zero"
+fi
 # Set up the working dir
 if [ "$PLATFORM" == "Illumina" ] ; then
     # Paired end Illumina data as input
@@ -253,6 +278,14 @@
 PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY)
 mkdir Output
 #
+# Use a subset of reads
+if [ ! -z "$SUBSET" ] ; then
+    echo "### Extracting subset of reads ###"
+    $(dirname $0)/fastq_subset.py -n $SUBSET -s $RANDOM_SEED $fastq_r1 $fastq_r2
+    fastq_r1="subset_r1.fq"
+    fastq_r2="subset_r2.fq"
+fi
+#
 # Copy in the default config.txt file
 echo "### Creating config.txt file for pal_finder run ###"
 /bin/cp $PALFINDER_DATA_DIR/config.txt .
@@ -311,8 +344,13 @@
 fi
 tail -$MAX_LINES pal_finder.log
 #
-# Check that log ends with "Done!!" message
-if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then
+# Check for success/failure
+if [ ! -z "$(tail -n 1 pal_finder.log | grep 'No microsatellites found in any reads. Ending script.')" ] ; then
+    # No microsatellites found
+    fatal ERROR pal_finder failed to locate any microsatellites
+    exit 1
+elif [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then
+    # Log doesn't end with "Done!!" (indicates failure)
     fatal ERROR pal_finder failed to complete successfully
 fi
 echo "### pal_finder finished ###"
@@ -321,33 +359,38 @@
 echo "### Checking for errors ###"
 if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then
     echo WARNING primer3 terminated prematurely due to bad product size ranges
+    $(find_bad_primer_ranges Output/pr3in.txt bad_primer_ranges.txt)
+    N_BAD_PRIMERS=$(cat bad_primer_ranges.txt | wc -l)
     if [ -z "$BAD_PRIMER_RANGES" ] ; then
 	# No output file so report to stderr
-	cat >&2 <<EOF
-ERROR primer3 terminated prematurely due to bad product size ranges
+	cat <<EOF
 
 Pal_finder generated bad ranges for the following read IDs:
+
 EOF
-	echo $(find_bad_primer_ranges Output/pr3in.txt) >&2
-	cat >&2 <<EOF
+	cat bad_primer_ranges.txt
+	cat <<EOF
 
 This error can occur when input data contains short R1 reads and has
 has not been properly trimmed and filtered.
 
 EOF
     else
-	# Dump bad ranges to file
+	# Move the bad ranges to the specified file
 	echo "### Writing read IDs with bad primer ranges ###"
-	echo $(find_bad_primer_ranges Output/pr3in.txt) >"$BAD_PRIMER_RANGES"
+	/bin/mv bad_primer_ranges.txt "$BAD_PRIMER_RANGES"
     fi
+else
+    N_BAD_PRIMERS=0
 fi
 #
 # Sort microsat_summary output
 echo "### Sorting microsat summary output ###"
 head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted
+echo "readsWithBadRanges:"$'\t'"$((N_BAD_PRIMERS * 2))" >>microsat_summary.sorted
 grep "^$" Output/microsat_summary.txt>>microsat_summary.sorted
 grep "^Microsat Type" Output/microsat_summary.txt >>microsat_summary.sorted
-tail -n +11 Output/microsat_summary.txt >>microsat_summary.sorted
+tail -n +11 Output/microsat_summary.txt | sort -r -n -k 5 >>microsat_summary.sorted
 mv microsat_summary.sorted Output/microsat_summary.txt
 #
 # Sort PAL_summary output