Mercurial > repos > pjbriggs > pal_finder
comparison pal_finder_wrapper.sh @ 14:3f8bf1a0403b draft
Uploaded version with bad primer ranger detection (WIP).
author | pjbriggs |
---|---|
date | Thu, 22 Mar 2018 07:21:26 -0400 |
parents | d26fb5260c67 |
children | a3af1ff4cad1 |
comparison
equal
deleted
inserted
replaced
13:88c972081f15 | 14:3f8bf1a0403b |
---|---|
24 # --primer-min-tm VALUE: minimum acceptable melting temperature (Celsius) for a primer oligo | 24 # --primer-min-tm VALUE: minimum acceptable melting temperature (Celsius) for a primer oligo |
25 # --primer-max-tm VALUE: maximum acceptable melting temperature (Celsius) | 25 # --primer-max-tm VALUE: maximum acceptable melting temperature (Celsius) |
26 # --primer-opt-tm VALUE: optimum melting temperature (Celsius) | 26 # --primer-opt-tm VALUE: optimum melting temperature (Celsius) |
27 # --primer-pair-max-diff-tm VALUE: max difference between melting temps of left & right primers | 27 # --primer-pair-max-diff-tm VALUE: max difference between melting temps of left & right primers |
28 # --output_config_file FNAME: write a copy of the config.txt file to FNAME | 28 # --output_config_file FNAME: write a copy of the config.txt file to FNAME |
29 # --filter_microsats FNAME: write output of filter options FNAME | 29 # --bad_primer_ranges FNAME: write a list of the read IDs generating bad primer ranges to FNAME |
30 # --filter_microsats FNAME: write output of filter options to FNAME | |
30 # -assembly FNAME: run the 'assembly' filter option and write to FNAME | 31 # -assembly FNAME: run the 'assembly' filter option and write to FNAME |
31 # -primers: run the 'primers' filter option | 32 # -primers: run the 'primers' filter option |
32 # -occurrences: run the 'occurrences' filter option | 33 # -occurrences: run the 'occurrences' filter option |
33 # -rankmotifs: run the 'rankmotifs' filter option | 34 # -rankmotifs: run the 'rankmotifs' filter option |
34 # | 35 # |
51 echo $* | 52 echo $* |
52 # | 53 # |
53 # Maximum size reporting log file contents | 54 # Maximum size reporting log file contents |
54 MAX_LINES=500 | 55 MAX_LINES=500 |
55 # | 56 # |
57 # Get helper functions | |
58 . $(dirname $0)/pal_finder_wrapper_utils.sh | |
59 # | |
56 # Initialise locations of scripts, data and executables | 60 # Initialise locations of scripts, data and executables |
57 # | 61 # |
58 # Set these in the environment to overide at execution time | 62 # Set these in the environment to overide at execution time |
59 : ${PALFINDER_SCRIPT_DIR:=/usr/bin} | 63 : ${PALFINDER_SCRIPT_DIR:=/usr/bin} |
60 : ${PALFINDER_DATA_DIR:=/usr/share/pal_finder_v0.02.04} | 64 : ${PALFINDER_DATA_DIR:=/usr/share/pal_finder_v0.02.04} |
61 : ${PRIMER3_CORE_EXE:=primer3_core} | 65 : ${PRIMER3_CORE_EXE:=primer3_core} |
62 # | 66 # |
63 # Filter script is in the same directory as this script | 67 # Filter script is in the same directory as this script |
64 PALFINDER_FILTER=$(dirname $0)/pal_filter.py | 68 PALFINDER_FILTER=$(dirname $0)/pal_filter.py |
65 if [ ! -f $PALFINDER_FILTER ] ; then | 69 if [ ! -f $PALFINDER_FILTER ] ; then |
66 echo No $PALFINDER_FILTER script >&2 | 70 fatal No $PALFINDER_FILTER script |
67 exit 1 | |
68 fi | 71 fi |
69 # | 72 # |
70 # Check that we have all the components | 73 # Check that we have all the components |
71 function have_program() { | |
72 local program=$1 | |
73 local got_program=$(which $program 2>&1 | grep "no $(basename $program) in") | |
74 if [ -z "$got_program" ] ; then | |
75 echo yes | |
76 else | |
77 echo no | |
78 fi | |
79 } | |
80 if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then | 74 if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then |
81 echo "ERROR primer3_core missing: ${PRIMER3_CORE_EXE} not found" >&2 | 75 fatal "primer3_core missing: ${PRIMER3_CORE_EXE} not found" |
82 exit 1 | |
83 fi | 76 fi |
84 if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then | 77 if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then |
85 echo "ERROR pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" >&2 | 78 fatal "pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" |
86 exit 1 | |
87 fi | 79 fi |
88 if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then | 80 if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then |
89 echo "ERROR pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" >&2 | 81 fatal "pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" |
90 exit 1 | |
91 fi | 82 fi |
92 # | 83 # |
93 # Initialise parameters used in the config.txt file | 84 # Initialise parameters used in the config.txt file |
94 PRIMER_PREFIX="test" | 85 PRIMER_PREFIX="test" |
95 MIN_2_MER_REPS=6 | 86 MIN_2_MER_REPS=6 |
111 PRIMER_PAIR_MAX_DIFF_TM= | 102 PRIMER_PAIR_MAX_DIFF_TM= |
112 OUTPUT_CONFIG_FILE= | 103 OUTPUT_CONFIG_FILE= |
113 OUTPUT_ASSEMBLY= | 104 OUTPUT_ASSEMBLY= |
114 FILTERED_MICROSATS= | 105 FILTERED_MICROSATS= |
115 FILTER_OPTIONS= | 106 FILTER_OPTIONS= |
107 BAD_PRIMER_RANGES= | |
116 # | 108 # |
117 # Collect command line arguments | 109 # Collect command line arguments |
118 if [ $# -lt 2 ] ; then | 110 if [ $# -lt 2 ] ; then |
119 echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" | 111 echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" |
120 echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" | 112 echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" |
121 exits | 113 fatal "Bad command line" |
122 fi | 114 fi |
123 if [ "$1" == "--454" ] ; then | 115 if [ "$1" == "--454" ] ; then |
124 PLATFORM="454" | 116 PLATFORM="454" |
125 FNA=$2 | 117 FNA=$2 |
126 else | 118 else |
210 ;; | 202 ;; |
211 --output_config_file) | 203 --output_config_file) |
212 shift | 204 shift |
213 OUTPUT_CONFIG_FILE=$1 | 205 OUTPUT_CONFIG_FILE=$1 |
214 ;; | 206 ;; |
207 --bad_primer_ranges) | |
208 shift | |
209 BAD_PRIMER_RANGES=$1 | |
210 ;; | |
215 --filter_microsats) | 211 --filter_microsats) |
216 shift | 212 shift |
217 FILTERED_MICROSATS=$1 | 213 FILTERED_MICROSATS=$1 |
218 ;; | 214 ;; |
219 -primers|-occurrences|-rankmotifs) | 215 -primers|-occurrences|-rankmotifs) |
233 done | 229 done |
234 # | 230 # |
235 # Check that primer3_core is available | 231 # Check that primer3_core is available |
236 got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"` | 232 got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"` |
237 if [ -z "$got_primer3" ] ; then | 233 if [ -z "$got_primer3" ] ; then |
238 echo ERROR primer3_core not found >&2 | 234 fatal "primer3_core not found" |
239 exit 1 | |
240 fi | 235 fi |
241 # | 236 # |
242 # Set up the working dir | 237 # Set up the working dir |
243 if [ "$PLATFORM" == "Illumina" ] ; then | 238 if [ "$PLATFORM" == "Illumina" ] ; then |
244 # Paired end Illumina data as input | 239 # Paired end Illumina data as input |
245 if [ $FASTQ_R1 == $FASTQ_R2 ] ; then | 240 if [ $FASTQ_R1 == $FASTQ_R2 ] ; then |
246 echo ERROR R1 and R2 fastqs are the same file >&2 | 241 fatal ERROR R1 and R2 fastqs are the same file |
247 exit 1 | |
248 fi | 242 fi |
249 ln -s $FASTQ_R1 | 243 ln -s $FASTQ_R1 |
250 ln -s $FASTQ_R2 | 244 ln -s $FASTQ_R2 |
251 fastq_r1=$(basename $FASTQ_R1) | 245 fastq_r1=$(basename $FASTQ_R1) |
252 fastq_r2=$(basename $FASTQ_R2) | 246 fastq_r2=$(basename $FASTQ_R2) |
262 # Copy in the default config.txt file | 256 # Copy in the default config.txt file |
263 echo "### Creating config.txt file for pal_finder run ###" | 257 echo "### Creating config.txt file for pal_finder run ###" |
264 /bin/cp $PALFINDER_DATA_DIR/config.txt . | 258 /bin/cp $PALFINDER_DATA_DIR/config.txt . |
265 # | 259 # |
266 # Update the config.txt file with new values | 260 # Update the config.txt file with new values |
267 function set_config_value() { | |
268 local key=$1 | |
269 local value=$2 | |
270 local config_txt=$3 | |
271 if [ -z "$value" ] ; then | |
272 echo "No value for $key, left as default" | |
273 else | |
274 echo Setting "$key" to "$value" | |
275 sed -i 's,^'"$key"' .*,'"$key"' '"$value"',' $config_txt | |
276 fi | |
277 } | |
278 # Input files | 261 # Input files |
279 set_config_value platform $PLATFORM config.txt | 262 set_config_value platform $PLATFORM config.txt |
280 if [ "$PLATFORM" == "Illumina" ] ; then | 263 if [ "$PLATFORM" == "Illumina" ] ; then |
281 set_config_value inputFormat fastq config.txt | 264 set_config_value inputFormat fastq config.txt |
282 set_config_value pairedEnd 1 config.txt | 265 set_config_value pairedEnd 1 config.txt |
297 set_config_value 5merMinReps $MIN_5_MER_REPS config.txt | 280 set_config_value 5merMinReps $MIN_5_MER_REPS config.txt |
298 set_config_value 6merMinReps $MIN_6_MER_REPS config.txt | 281 set_config_value 6merMinReps $MIN_6_MER_REPS config.txt |
299 # Primer3 settings | 282 # Primer3 settings |
300 set_config_value primer3input Output/pr3in.txt config.txt | 283 set_config_value primer3input Output/pr3in.txt config.txt |
301 set_config_value primer3output Output/pr3out.txt config.txt | 284 set_config_value primer3output Output/pr3out.txt config.txt |
285 set_config_value keepPrimer3files 1 config.txt | |
302 set_config_value primer3executable $PRIMER3_CORE_EXE config.txt | 286 set_config_value primer3executable $PRIMER3_CORE_EXE config.txt |
303 set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt | 287 set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt |
304 set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt | 288 set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt |
305 set_config_value PRIMER_OPT_SIZE "$PRIMER_OPT_SIZE" config.txt | 289 set_config_value PRIMER_OPT_SIZE "$PRIMER_OPT_SIZE" config.txt |
306 set_config_value PRIMER_MIN_SIZE "$PRIMER_MIN_SIZE" config.txt | 290 set_config_value PRIMER_MIN_SIZE "$PRIMER_MIN_SIZE" config.txt |
327 fi | 311 fi |
328 tail -$MAX_LINES pal_finder.log | 312 tail -$MAX_LINES pal_finder.log |
329 # | 313 # |
330 # Check that log ends with "Done!!" message | 314 # Check that log ends with "Done!!" message |
331 if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then | 315 if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then |
332 echo ERROR pal_finder failed to complete successfully >&2 | 316 fatal ERROR pal_finder failed to complete successfully |
333 exit 1 | 317 fi |
318 echo "### pal_finder finished ###" | |
319 # | |
320 # Check for errors in pal_finder output | |
321 echo "### Checking for errors ###" | |
322 if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then | |
323 echo WARNING primer3 terminated prematurely due to bad product size ranges | |
324 if [ -z "$BAD_PRIMER_RANGES" ] ; then | |
325 # No output file so report to stderr | |
326 cat >&2 <<EOF | |
327 ERROR primer3 terminated prematurely due to bad product size ranges | |
328 | |
329 Pal_finder generated bad ranges for the following read IDs: | |
330 EOF | |
331 echo $(find_bad_primer_ranges Output/pr3in.txt) >&2 | |
332 cat >&2 <<EOF | |
333 | |
334 This error can occur when input data contains short R1 reads and has | |
335 has not been properly trimmed and filtered. | |
336 | |
337 EOF | |
338 else | |
339 # Dump bad ranges to file | |
340 echo "### Writing read IDs with bad primer ranges ###" | |
341 echo $(find_bad_primer_ranges Output/pr3in.txt) >"$BAD_PRIMER_RANGES" | |
342 fi | |
334 fi | 343 fi |
335 # | 344 # |
336 # Sort microsat_summary output | 345 # Sort microsat_summary output |
337 echo "### Sorting microsat summary output ###" | 346 echo "### Sorting microsat summary output ###" |
338 head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted | 347 head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted |
360 echo WARNING output too long, truncated to last $MAX_LINES lines: | 369 echo WARNING output too long, truncated to last $MAX_LINES lines: |
361 echo ... | 370 echo ... |
362 fi | 371 fi |
363 tail -$MAX_LINES pal_filter.log | 372 tail -$MAX_LINES pal_filter.log |
364 if [ $? -ne 0 ] ; then | 373 if [ $? -ne 0 ] ; then |
365 echo ERROR $PALFINDER_FILTER exited with non-zero status >&2 | 374 fatal $PALFINDER_FILTER exited with non-zero status |
366 exit 1 | |
367 elif [ ! -f PAL_summary.filtered ] ; then | 375 elif [ ! -f PAL_summary.filtered ] ; then |
368 echo ERROR no output from $PALFINDER_FILTER >&2 | 376 fatal no output from $PALFINDER_FILTER |
369 exit 1 | |
370 fi | 377 fi |
371 fi | 378 fi |
372 # | 379 # |
373 # Clean up | 380 # Clean up |
374 echo "### Handling output files ###" | 381 echo "### Handling output files ###" |
384 if [ ! -z "$OUTPUT_ASSEMBLY" ] ; then | 391 if [ ! -z "$OUTPUT_ASSEMBLY" ] ; then |
385 assembly=${fastq_r1%.*}_pal_filter_assembly_output.txt | 392 assembly=${fastq_r1%.*}_pal_filter_assembly_output.txt |
386 if [ -f "$assembly" ] ; then | 393 if [ -f "$assembly" ] ; then |
387 /bin/mv $assembly "$OUTPUT_ASSEMBLY" | 394 /bin/mv $assembly "$OUTPUT_ASSEMBLY" |
388 else | 395 else |
389 echo ERROR no assembly output found >&2 | 396 fatal no assembly output found |
390 exit 1 | |
391 fi | 397 fi |
392 fi | 398 fi |
393 if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then | 399 if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then |
394 /bin/mv config.txt $OUTPUT_CONFIG_FILE | 400 /bin/mv config.txt $OUTPUT_CONFIG_FILE |
395 fi | 401 fi |