Mercurial > repos > pjbriggs > pal_finder
comparison pal_finder_wrapper.sh @ 15:a3af1ff4cad1 draft
pal_finder 0.02.04.7 for testing.
author | pjbriggs |
---|---|
date | Mon, 14 May 2018 11:10:19 -0400 |
parents | 3f8bf1a0403b |
children |
comparison
equal
deleted
inserted
replaced
14:3f8bf1a0403b | 15:a3af1ff4cad1 |
---|---|
30 # --filter_microsats FNAME: write output of filter options to FNAME | 30 # --filter_microsats FNAME: write output of filter options to FNAME |
31 # -assembly FNAME: run the 'assembly' filter option and write to FNAME | 31 # -assembly FNAME: run the 'assembly' filter option and write to FNAME |
32 # -primers: run the 'primers' filter option | 32 # -primers: run the 'primers' filter option |
33 # -occurrences: run the 'occurrences' filter option | 33 # -occurrences: run the 'occurrences' filter option |
34 # -rankmotifs: run the 'rankmotifs' filter option | 34 # -rankmotifs: run the 'rankmotifs' filter option |
35 # --subset N: use a subset of reads of size N | |
35 # | 36 # |
36 # pal_finder is available from http://sourceforge.net/projects/palfinder/ | 37 # pal_finder is available from http://sourceforge.net/projects/palfinder/ |
37 # | 38 # |
38 # primer3 is available from http://primer3.sourceforge.net/releases.php | 39 # primer3 is available from http://primer3.sourceforge.net/releases.php |
39 # (nb needs version 2.0.0-alpha) | 40 # (nb needs version 2.0.0-alpha) |
102 PRIMER_PAIR_MAX_DIFF_TM= | 103 PRIMER_PAIR_MAX_DIFF_TM= |
103 OUTPUT_CONFIG_FILE= | 104 OUTPUT_CONFIG_FILE= |
104 OUTPUT_ASSEMBLY= | 105 OUTPUT_ASSEMBLY= |
105 FILTERED_MICROSATS= | 106 FILTERED_MICROSATS= |
106 FILTER_OPTIONS= | 107 FILTER_OPTIONS= |
107 BAD_PRIMER_RANGES= | 108 SUBSET= |
109 RANDOM_SEED=568765 | |
108 # | 110 # |
109 # Collect command line arguments | 111 # Collect command line arguments |
110 if [ $# -lt 2 ] ; then | 112 if [ $# -lt 2 ] ; then |
111 echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" | 113 echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" |
112 echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" | 114 echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" |
218 -assembly) | 220 -assembly) |
219 FILTER_OPTIONS="$FILTER_OPTIONS $1" | 221 FILTER_OPTIONS="$FILTER_OPTIONS $1" |
220 shift | 222 shift |
221 OUTPUT_ASSEMBLY=$1 | 223 OUTPUT_ASSEMBLY=$1 |
222 ;; | 224 ;; |
225 --subset) | |
226 shift | |
227 SUBSET=$1 | |
228 ;; | |
223 *) | 229 *) |
224 echo Unknown option: $1 >&2 | 230 echo Unknown option: $1 >&2 |
225 exit 1 | 231 exit 1 |
226 ;; | 232 ;; |
227 esac | 233 esac |
232 got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"` | 238 got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"` |
233 if [ -z "$got_primer3" ] ; then | 239 if [ -z "$got_primer3" ] ; then |
234 fatal "primer3_core not found" | 240 fatal "primer3_core not found" |
235 fi | 241 fi |
236 # | 242 # |
243 # Check the n-mers specification | |
244 if [ $MIN_6_MER_REPS -ne 0 ] ; then | |
245 if [ $MIN_5_MER_REPS -eq 0 ] ; then | |
246 fatal "Minimum number of 5-mers cannot be zero if number of 6-mers is non-zero" | |
247 fi | |
248 fi | |
249 if [ $MIN_5_MER_REPS -ne 0 ] ; then | |
250 if [ $MIN_4_MER_REPS -eq 0 ] ; then | |
251 fatal "Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero" | |
252 fi | |
253 fi | |
254 if [ $MIN_4_MER_REPS -ne 0 ] ; then | |
255 if [ $MIN_3_MER_REPS -eq 0 ] ; then | |
256 fatal "Minimum number of 3-mers cannot be zero if number of 4-mers is non-zero" | |
257 fi | |
258 fi | |
259 if [ $MIN_2_MER_REPS -eq 0 ] ; then | |
260 fatal "Minimum number of 2-mer repeats cannot be zero" | |
261 fi | |
237 # Set up the working dir | 262 # Set up the working dir |
238 if [ "$PLATFORM" == "Illumina" ] ; then | 263 if [ "$PLATFORM" == "Illumina" ] ; then |
239 # Paired end Illumina data as input | 264 # Paired end Illumina data as input |
240 if [ $FASTQ_R1 == $FASTQ_R2 ] ; then | 265 if [ $FASTQ_R1 == $FASTQ_R2 ] ; then |
241 fatal ERROR R1 and R2 fastqs are the same file | 266 fatal ERROR R1 and R2 fastqs are the same file |
250 fna=$(basename $FNA) | 275 fna=$(basename $FNA) |
251 fi | 276 fi |
252 ln -s $PRIMER_MISPRIMING_LIBRARY | 277 ln -s $PRIMER_MISPRIMING_LIBRARY |
253 PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY) | 278 PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY) |
254 mkdir Output | 279 mkdir Output |
280 # | |
281 # Use a subset of reads | |
282 if [ ! -z "$SUBSET" ] ; then | |
283 echo "### Extracting subset of reads ###" | |
284 $(dirname $0)/fastq_subset.py -n $SUBSET -s $RANDOM_SEED $fastq_r1 $fastq_r2 | |
285 fastq_r1="subset_r1.fq" | |
286 fastq_r2="subset_r2.fq" | |
287 fi | |
255 # | 288 # |
256 # Copy in the default config.txt file | 289 # Copy in the default config.txt file |
257 echo "### Creating config.txt file for pal_finder run ###" | 290 echo "### Creating config.txt file for pal_finder run ###" |
258 /bin/cp $PALFINDER_DATA_DIR/config.txt . | 291 /bin/cp $PALFINDER_DATA_DIR/config.txt . |
259 # | 292 # |
309 echo WARNING output too long, truncated to last $MAX_LINES lines: | 342 echo WARNING output too long, truncated to last $MAX_LINES lines: |
310 echo ... | 343 echo ... |
311 fi | 344 fi |
312 tail -$MAX_LINES pal_finder.log | 345 tail -$MAX_LINES pal_finder.log |
313 # | 346 # |
314 # Check that log ends with "Done!!" message | 347 # Check for success/failure |
315 if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then | 348 if [ ! -z "$(tail -n 1 pal_finder.log | grep 'No microsatellites found in any reads. Ending script.')" ] ; then |
349 # No microsatellites found | |
350 fatal ERROR pal_finder failed to locate any microsatellites | |
351 exit 1 | |
352 elif [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then | |
353 # Log doesn't end with "Done!!" (indicates failure) | |
316 fatal ERROR pal_finder failed to complete successfully | 354 fatal ERROR pal_finder failed to complete successfully |
317 fi | 355 fi |
318 echo "### pal_finder finished ###" | 356 echo "### pal_finder finished ###" |
319 # | 357 # |
320 # Check for errors in pal_finder output | 358 # Check for errors in pal_finder output |
321 echo "### Checking for errors ###" | 359 echo "### Checking for errors ###" |
322 if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then | 360 if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then |
323 echo WARNING primer3 terminated prematurely due to bad product size ranges | 361 echo WARNING primer3 terminated prematurely due to bad product size ranges |
362 $(find_bad_primer_ranges Output/pr3in.txt bad_primer_ranges.txt) | |
363 N_BAD_PRIMERS=$(cat bad_primer_ranges.txt | wc -l) | |
324 if [ -z "$BAD_PRIMER_RANGES" ] ; then | 364 if [ -z "$BAD_PRIMER_RANGES" ] ; then |
325 # No output file so report to stderr | 365 # No output file so report to stderr |
326 cat >&2 <<EOF | 366 cat <<EOF |
327 ERROR primer3 terminated prematurely due to bad product size ranges | |
328 | 367 |
329 Pal_finder generated bad ranges for the following read IDs: | 368 Pal_finder generated bad ranges for the following read IDs: |
369 | |
330 EOF | 370 EOF |
331 echo $(find_bad_primer_ranges Output/pr3in.txt) >&2 | 371 cat bad_primer_ranges.txt |
332 cat >&2 <<EOF | 372 cat <<EOF |
333 | 373 |
334 This error can occur when input data contains short R1 reads and has | 374 This error can occur when input data contains short R1 reads and has |
335 has not been properly trimmed and filtered. | 375 has not been properly trimmed and filtered. |
336 | 376 |
337 EOF | 377 EOF |
338 else | 378 else |
339 # Dump bad ranges to file | 379 # Move the bad ranges to the specified file |
340 echo "### Writing read IDs with bad primer ranges ###" | 380 echo "### Writing read IDs with bad primer ranges ###" |
341 echo $(find_bad_primer_ranges Output/pr3in.txt) >"$BAD_PRIMER_RANGES" | 381 /bin/mv bad_primer_ranges.txt "$BAD_PRIMER_RANGES" |
342 fi | 382 fi |
383 else | |
384 N_BAD_PRIMERS=0 | |
343 fi | 385 fi |
344 # | 386 # |
345 # Sort microsat_summary output | 387 # Sort microsat_summary output |
346 echo "### Sorting microsat summary output ###" | 388 echo "### Sorting microsat summary output ###" |
347 head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted | 389 head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted |
390 echo "readsWithBadRanges:"$'\t'"$((N_BAD_PRIMERS * 2))" >>microsat_summary.sorted | |
348 grep "^$" Output/microsat_summary.txt>>microsat_summary.sorted | 391 grep "^$" Output/microsat_summary.txt>>microsat_summary.sorted |
349 grep "^Microsat Type" Output/microsat_summary.txt >>microsat_summary.sorted | 392 grep "^Microsat Type" Output/microsat_summary.txt >>microsat_summary.sorted |
350 tail -n +11 Output/microsat_summary.txt >>microsat_summary.sorted | 393 tail -n +11 Output/microsat_summary.txt | sort -r -n -k 5 >>microsat_summary.sorted |
351 mv microsat_summary.sorted Output/microsat_summary.txt | 394 mv microsat_summary.sorted Output/microsat_summary.txt |
352 # | 395 # |
353 # Sort PAL_summary output | 396 # Sort PAL_summary output |
354 echo "### Sorting PAL summary output ###" | 397 echo "### Sorting PAL summary output ###" |
355 head -1 Output/PAL_summary.txt > Output/PAL_summary.sorted.txt | 398 head -1 Output/PAL_summary.txt > Output/PAL_summary.sorted.txt |