comparison pal_finder_wrapper.sh @ 15:a3af1ff4cad1 draft

pal_finder 0.02.04.7 for testing.
author pjbriggs
date Mon, 14 May 2018 11:10:19 -0400
parents 3f8bf1a0403b
children
comparison
equal deleted inserted replaced
14:3f8bf1a0403b 15:a3af1ff4cad1
30 # --filter_microsats FNAME: write output of filter options to FNAME 30 # --filter_microsats FNAME: write output of filter options to FNAME
31 # -assembly FNAME: run the 'assembly' filter option and write to FNAME 31 # -assembly FNAME: run the 'assembly' filter option and write to FNAME
32 # -primers: run the 'primers' filter option 32 # -primers: run the 'primers' filter option
33 # -occurrences: run the 'occurrences' filter option 33 # -occurrences: run the 'occurrences' filter option
34 # -rankmotifs: run the 'rankmotifs' filter option 34 # -rankmotifs: run the 'rankmotifs' filter option
35 # --subset N: use a subset of reads of size N
35 # 36 #
36 # pal_finder is available from http://sourceforge.net/projects/palfinder/ 37 # pal_finder is available from http://sourceforge.net/projects/palfinder/
37 # 38 #
38 # primer3 is available from http://primer3.sourceforge.net/releases.php 39 # primer3 is available from http://primer3.sourceforge.net/releases.php
39 # (nb needs version 2.0.0-alpha) 40 # (nb needs version 2.0.0-alpha)
102 PRIMER_PAIR_MAX_DIFF_TM= 103 PRIMER_PAIR_MAX_DIFF_TM=
103 OUTPUT_CONFIG_FILE= 104 OUTPUT_CONFIG_FILE=
104 OUTPUT_ASSEMBLY= 105 OUTPUT_ASSEMBLY=
105 FILTERED_MICROSATS= 106 FILTERED_MICROSATS=
106 FILTER_OPTIONS= 107 FILTER_OPTIONS=
107 BAD_PRIMER_RANGES= 108 SUBSET=
109 RANDOM_SEED=568765
108 # 110 #
109 # Collect command line arguments 111 # Collect command line arguments
110 if [ $# -lt 2 ] ; then 112 if [ $# -lt 2 ] ; then
111 echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" 113 echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]"
112 echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" 114 echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]"
218 -assembly) 220 -assembly)
219 FILTER_OPTIONS="$FILTER_OPTIONS $1" 221 FILTER_OPTIONS="$FILTER_OPTIONS $1"
220 shift 222 shift
221 OUTPUT_ASSEMBLY=$1 223 OUTPUT_ASSEMBLY=$1
222 ;; 224 ;;
225 --subset)
226 shift
227 SUBSET=$1
228 ;;
223 *) 229 *)
224 echo Unknown option: $1 >&2 230 echo Unknown option: $1 >&2
225 exit 1 231 exit 1
226 ;; 232 ;;
227 esac 233 esac
232 got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"` 238 got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"`
233 if [ -z "$got_primer3" ] ; then 239 if [ -z "$got_primer3" ] ; then
234 fatal "primer3_core not found" 240 fatal "primer3_core not found"
235 fi 241 fi
236 # 242 #
243 # Check the n-mers specification
244 if [ $MIN_6_MER_REPS -ne 0 ] ; then
245 if [ $MIN_5_MER_REPS -eq 0 ] ; then
246 fatal "Minimum number of 5-mers cannot be zero if number of 6-mers is non-zero"
247 fi
248 fi
249 if [ $MIN_5_MER_REPS -ne 0 ] ; then
250 if [ $MIN_4_MER_REPS -eq 0 ] ; then
251 fatal "Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero"
252 fi
253 fi
254 if [ $MIN_4_MER_REPS -ne 0 ] ; then
255 if [ $MIN_3_MER_REPS -eq 0 ] ; then
256 fatal "Minimum number of 3-mers cannot be zero if number of 4-mers is non-zero"
257 fi
258 fi
259 if [ $MIN_2_MER_REPS -eq 0 ] ; then
260 fatal "Minimum number of 2-mer repeats cannot be zero"
261 fi
237 # Set up the working dir 262 # Set up the working dir
238 if [ "$PLATFORM" == "Illumina" ] ; then 263 if [ "$PLATFORM" == "Illumina" ] ; then
239 # Paired end Illumina data as input 264 # Paired end Illumina data as input
240 if [ $FASTQ_R1 == $FASTQ_R2 ] ; then 265 if [ $FASTQ_R1 == $FASTQ_R2 ] ; then
241 fatal ERROR R1 and R2 fastqs are the same file 266 fatal ERROR R1 and R2 fastqs are the same file
250 fna=$(basename $FNA) 275 fna=$(basename $FNA)
251 fi 276 fi
252 ln -s $PRIMER_MISPRIMING_LIBRARY 277 ln -s $PRIMER_MISPRIMING_LIBRARY
253 PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY) 278 PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY)
254 mkdir Output 279 mkdir Output
280 #
281 # Use a subset of reads
282 if [ ! -z "$SUBSET" ] ; then
283 echo "### Extracting subset of reads ###"
284 $(dirname $0)/fastq_subset.py -n $SUBSET -s $RANDOM_SEED $fastq_r1 $fastq_r2
285 fastq_r1="subset_r1.fq"
286 fastq_r2="subset_r2.fq"
287 fi
255 # 288 #
256 # Copy in the default config.txt file 289 # Copy in the default config.txt file
257 echo "### Creating config.txt file for pal_finder run ###" 290 echo "### Creating config.txt file for pal_finder run ###"
258 /bin/cp $PALFINDER_DATA_DIR/config.txt . 291 /bin/cp $PALFINDER_DATA_DIR/config.txt .
259 # 292 #
309 echo WARNING output too long, truncated to last $MAX_LINES lines: 342 echo WARNING output too long, truncated to last $MAX_LINES lines:
310 echo ... 343 echo ...
311 fi 344 fi
312 tail -$MAX_LINES pal_finder.log 345 tail -$MAX_LINES pal_finder.log
313 # 346 #
314 # Check that log ends with "Done!!" message 347 # Check for success/failure
315 if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then 348 if [ ! -z "$(tail -n 1 pal_finder.log | grep 'No microsatellites found in any reads. Ending script.')" ] ; then
349 # No microsatellites found
350 fatal ERROR pal_finder failed to locate any microsatellites
351 exit 1
352 elif [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then
353 # Log doesn't end with "Done!!" (indicates failure)
316 fatal ERROR pal_finder failed to complete successfully 354 fatal ERROR pal_finder failed to complete successfully
317 fi 355 fi
318 echo "### pal_finder finished ###" 356 echo "### pal_finder finished ###"
319 # 357 #
320 # Check for errors in pal_finder output 358 # Check for errors in pal_finder output
321 echo "### Checking for errors ###" 359 echo "### Checking for errors ###"
322 if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then 360 if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then
323 echo WARNING primer3 terminated prematurely due to bad product size ranges 361 echo WARNING primer3 terminated prematurely due to bad product size ranges
362 $(find_bad_primer_ranges Output/pr3in.txt bad_primer_ranges.txt)
363 N_BAD_PRIMERS=$(cat bad_primer_ranges.txt | wc -l)
324 if [ -z "$BAD_PRIMER_RANGES" ] ; then 364 if [ -z "$BAD_PRIMER_RANGES" ] ; then
325 # No output file so report to stderr 365 # No output file so report to stderr
326 cat >&2 <<EOF 366 cat <<EOF
327 ERROR primer3 terminated prematurely due to bad product size ranges
328 367
329 Pal_finder generated bad ranges for the following read IDs: 368 Pal_finder generated bad ranges for the following read IDs:
369
330 EOF 370 EOF
331 echo $(find_bad_primer_ranges Output/pr3in.txt) >&2 371 cat bad_primer_ranges.txt
332 cat >&2 <<EOF 372 cat <<EOF
333 373
334 This error can occur when input data contains short R1 reads and has 374 This error can occur when input data contains short R1 reads and has
335 has not been properly trimmed and filtered. 375 has not been properly trimmed and filtered.
336 376
337 EOF 377 EOF
338 else 378 else
339 # Dump bad ranges to file 379 # Move the bad ranges to the specified file
340 echo "### Writing read IDs with bad primer ranges ###" 380 echo "### Writing read IDs with bad primer ranges ###"
341 echo $(find_bad_primer_ranges Output/pr3in.txt) >"$BAD_PRIMER_RANGES" 381 /bin/mv bad_primer_ranges.txt "$BAD_PRIMER_RANGES"
342 fi 382 fi
383 else
384 N_BAD_PRIMERS=0
343 fi 385 fi
344 # 386 #
345 # Sort microsat_summary output 387 # Sort microsat_summary output
346 echo "### Sorting microsat summary output ###" 388 echo "### Sorting microsat summary output ###"
347 head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted 389 head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted
390 echo "readsWithBadRanges:"$'\t'"$((N_BAD_PRIMERS * 2))" >>microsat_summary.sorted
348 grep "^$" Output/microsat_summary.txt>>microsat_summary.sorted 391 grep "^$" Output/microsat_summary.txt>>microsat_summary.sorted
349 grep "^Microsat Type" Output/microsat_summary.txt >>microsat_summary.sorted 392 grep "^Microsat Type" Output/microsat_summary.txt >>microsat_summary.sorted
350 tail -n +11 Output/microsat_summary.txt >>microsat_summary.sorted 393 tail -n +11 Output/microsat_summary.txt | sort -r -n -k 5 >>microsat_summary.sorted
351 mv microsat_summary.sorted Output/microsat_summary.txt 394 mv microsat_summary.sorted Output/microsat_summary.txt
352 # 395 #
353 # Sort PAL_summary output 396 # Sort PAL_summary output
354 echo "### Sorting PAL summary output ###" 397 echo "### Sorting PAL summary output ###"
355 head -1 Output/PAL_summary.txt > Output/PAL_summary.sorted.txt 398 head -1 Output/PAL_summary.txt > Output/PAL_summary.sorted.txt