Mercurial > repos > pjbriggs > pal_finder
diff pal_finder_wrapper.sh @ 15:a3af1ff4cad1 draft
pal_finder 0.02.04.7 for testing.
author | pjbriggs |
---|---|
date | Mon, 14 May 2018 11:10:19 -0400 |
parents | 3f8bf1a0403b |
children |
line wrap: on
line diff
--- a/pal_finder_wrapper.sh Thu Mar 22 07:21:26 2018 -0400 +++ b/pal_finder_wrapper.sh Mon May 14 11:10:19 2018 -0400 @@ -32,6 +32,7 @@ # -primers: run the 'primers' filter option # -occurrences: run the 'occurrences' filter option # -rankmotifs: run the 'rankmotifs' filter option +# --subset N: use a subset of reads of size N # # pal_finder is available from http://sourceforge.net/projects/palfinder/ # @@ -104,7 +105,8 @@ OUTPUT_ASSEMBLY= FILTERED_MICROSATS= FILTER_OPTIONS= -BAD_PRIMER_RANGES= +SUBSET= +RANDOM_SEED=568765 # # Collect command line arguments if [ $# -lt 2 ] ; then @@ -220,6 +222,10 @@ shift OUTPUT_ASSEMBLY=$1 ;; + --subset) + shift + SUBSET=$1 + ;; *) echo Unknown option: $1 >&2 exit 1 @@ -234,6 +240,25 @@ fatal "primer3_core not found" fi # +# Check the n-mers specification +if [ $MIN_6_MER_REPS -ne 0 ] ; then + if [ $MIN_5_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 5-mers cannot be zero if number of 6-mers is non-zero" + fi +fi +if [ $MIN_5_MER_REPS -ne 0 ] ; then + if [ $MIN_4_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero" + fi +fi +if [ $MIN_4_MER_REPS -ne 0 ] ; then + if [ $MIN_3_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 3-mers cannot be zero if number of 4-mers is non-zero" + fi +fi +if [ $MIN_2_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 2-mer repeats cannot be zero" +fi # Set up the working dir if [ "$PLATFORM" == "Illumina" ] ; then # Paired end Illumina data as input @@ -253,6 +278,14 @@ PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY) mkdir Output # +# Use a subset of reads +if [ ! -z "$SUBSET" ] ; then + echo "### Extracting subset of reads ###" + $(dirname $0)/fastq_subset.py -n $SUBSET -s $RANDOM_SEED $fastq_r1 $fastq_r2 + fastq_r1="subset_r1.fq" + fastq_r2="subset_r2.fq" +fi +# # Copy in the default config.txt file echo "### Creating config.txt file for pal_finder run ###" /bin/cp $PALFINDER_DATA_DIR/config.txt . @@ -311,8 +344,13 @@ fi tail -$MAX_LINES pal_finder.log # -# Check that log ends with "Done!!" message -if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then +# Check for success/failure +if [ ! -z "$(tail -n 1 pal_finder.log | grep 'No microsatellites found in any reads. Ending script.')" ] ; then + # No microsatellites found + fatal ERROR pal_finder failed to locate any microsatellites + exit 1 +elif [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then + # Log doesn't end with "Done!!" (indicates failure) fatal ERROR pal_finder failed to complete successfully fi echo "### pal_finder finished ###" @@ -321,33 +359,38 @@ echo "### Checking for errors ###" if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then echo WARNING primer3 terminated prematurely due to bad product size ranges + $(find_bad_primer_ranges Output/pr3in.txt bad_primer_ranges.txt) + N_BAD_PRIMERS=$(cat bad_primer_ranges.txt | wc -l) if [ -z "$BAD_PRIMER_RANGES" ] ; then # No output file so report to stderr - cat >&2 <<EOF -ERROR primer3 terminated prematurely due to bad product size ranges + cat <<EOF Pal_finder generated bad ranges for the following read IDs: + EOF - echo $(find_bad_primer_ranges Output/pr3in.txt) >&2 - cat >&2 <<EOF + cat bad_primer_ranges.txt + cat <<EOF This error can occur when input data contains short R1 reads and has has not been properly trimmed and filtered. EOF else - # Dump bad ranges to file + # Move the bad ranges to the specified file echo "### Writing read IDs with bad primer ranges ###" - echo $(find_bad_primer_ranges Output/pr3in.txt) >"$BAD_PRIMER_RANGES" + /bin/mv bad_primer_ranges.txt "$BAD_PRIMER_RANGES" fi +else + N_BAD_PRIMERS=0 fi # # Sort microsat_summary output echo "### Sorting microsat summary output ###" head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted +echo "readsWithBadRanges:"$'\t'"$((N_BAD_PRIMERS * 2))" >>microsat_summary.sorted grep "^$" Output/microsat_summary.txt>>microsat_summary.sorted grep "^Microsat Type" Output/microsat_summary.txt >>microsat_summary.sorted -tail -n +11 Output/microsat_summary.txt >>microsat_summary.sorted +tail -n +11 Output/microsat_summary.txt | sort -r -n -k 5 >>microsat_summary.sorted mv microsat_summary.sorted Output/microsat_summary.txt # # Sort PAL_summary output