Mercurial > repos > pjbriggs > pal_finder
comparison pal_finder_wrapper.sh @ 12:d26fb5260c67 draft
0.02.04.6: update to use conda to resolve dependencies.
author | pjbriggs |
---|---|
date | Thu, 15 Mar 2018 09:49:05 -0400 |
parents | 84613d5f1713 |
children | 3f8bf1a0403b |
comparison
equal
deleted
inserted
replaced
11:84613d5f1713 | 12:d26fb5260c67 |
---|---|
51 echo $* | 51 echo $* |
52 # | 52 # |
53 # Maximum size reporting log file contents | 53 # Maximum size reporting log file contents |
54 MAX_LINES=500 | 54 MAX_LINES=500 |
55 # | 55 # |
56 # Get helper functions | |
57 . $(dirname $0)/pal_finder_wrapper_utils.sh | |
58 # | |
59 # Initialise locations of scripts, data and executables | 56 # Initialise locations of scripts, data and executables |
60 # | 57 # |
61 # Set these in the environment to overide at execution time | 58 # Set these in the environment to overide at execution time |
62 : ${PALFINDER_SCRIPT_DIR:=/usr/bin} | 59 : ${PALFINDER_SCRIPT_DIR:=/usr/bin} |
63 : ${PALFINDER_DATA_DIR:=/usr/share/pal_finder_v0.02.04} | 60 : ${PALFINDER_DATA_DIR:=/usr/share/pal_finder_v0.02.04} |
64 : ${PRIMER3_CORE_EXE:=primer3_core} | 61 : ${PRIMER3_CORE_EXE:=primer3_core} |
65 # | 62 # |
66 # Filter script is in the same directory as this script | 63 # Filter script is in the same directory as this script |
67 PALFINDER_FILTER=$(dirname $0)/pal_filter.py | 64 PALFINDER_FILTER=$(dirname $0)/pal_filter.py |
68 if [ ! -f $PALFINDER_FILTER ] ; then | 65 if [ ! -f $PALFINDER_FILTER ] ; then |
69 fatal No $PALFINDER_FILTER script | 66 echo No $PALFINDER_FILTER script >&2 |
67 exit 1 | |
70 fi | 68 fi |
71 # | 69 # |
72 # Check that we have all the components | 70 # Check that we have all the components |
71 function have_program() { | |
72 local program=$1 | |
73 local got_program=$(which $program 2>&1 | grep "no $(basename $program) in") | |
74 if [ -z "$got_program" ] ; then | |
75 echo yes | |
76 else | |
77 echo no | |
78 fi | |
79 } | |
73 if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then | 80 if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then |
74 fatal "primer3_core missing: ${PRIMER3_CORE_EXE} not found" | 81 echo "ERROR primer3_core missing: ${PRIMER3_CORE_EXE} not found" >&2 |
82 exit 1 | |
75 fi | 83 fi |
76 if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then | 84 if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then |
77 fatal "pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" | 85 echo "ERROR pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" >&2 |
86 exit 1 | |
78 fi | 87 fi |
79 if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then | 88 if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then |
80 fatal "pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" | 89 echo "ERROR pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" >&2 |
90 exit 1 | |
81 fi | 91 fi |
82 # | 92 # |
83 # Initialise parameters used in the config.txt file | 93 # Initialise parameters used in the config.txt file |
84 PRIMER_PREFIX="test" | 94 PRIMER_PREFIX="test" |
85 MIN_2_MER_REPS=6 | 95 MIN_2_MER_REPS=6 |
106 # | 116 # |
107 # Collect command line arguments | 117 # Collect command line arguments |
108 if [ $# -lt 2 ] ; then | 118 if [ $# -lt 2 ] ; then |
109 echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" | 119 echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" |
110 echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" | 120 echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" |
111 fatal "Bad command line" | 121 exits |
112 fi | 122 fi |
113 if [ "$1" == "--454" ] ; then | 123 if [ "$1" == "--454" ] ; then |
114 PLATFORM="454" | 124 PLATFORM="454" |
115 FNA=$2 | 125 FNA=$2 |
116 else | 126 else |
223 done | 233 done |
224 # | 234 # |
225 # Check that primer3_core is available | 235 # Check that primer3_core is available |
226 got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"` | 236 got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"` |
227 if [ -z "$got_primer3" ] ; then | 237 if [ -z "$got_primer3" ] ; then |
228 fatal "primer3_core not found" | 238 echo ERROR primer3_core not found >&2 |
239 exit 1 | |
229 fi | 240 fi |
230 # | 241 # |
231 # Set up the working dir | 242 # Set up the working dir |
232 if [ "$PLATFORM" == "Illumina" ] ; then | 243 if [ "$PLATFORM" == "Illumina" ] ; then |
233 # Paired end Illumina data as input | 244 # Paired end Illumina data as input |
234 if [ $FASTQ_R1 == $FASTQ_R2 ] ; then | 245 if [ $FASTQ_R1 == $FASTQ_R2 ] ; then |
235 fatal ERROR R1 and R2 fastqs are the same file | 246 echo ERROR R1 and R2 fastqs are the same file >&2 |
247 exit 1 | |
236 fi | 248 fi |
237 ln -s $FASTQ_R1 | 249 ln -s $FASTQ_R1 |
238 ln -s $FASTQ_R2 | 250 ln -s $FASTQ_R2 |
239 fastq_r1=$(basename $FASTQ_R1) | 251 fastq_r1=$(basename $FASTQ_R1) |
240 fastq_r2=$(basename $FASTQ_R2) | 252 fastq_r2=$(basename $FASTQ_R2) |
250 # Copy in the default config.txt file | 262 # Copy in the default config.txt file |
251 echo "### Creating config.txt file for pal_finder run ###" | 263 echo "### Creating config.txt file for pal_finder run ###" |
252 /bin/cp $PALFINDER_DATA_DIR/config.txt . | 264 /bin/cp $PALFINDER_DATA_DIR/config.txt . |
253 # | 265 # |
254 # Update the config.txt file with new values | 266 # Update the config.txt file with new values |
267 function set_config_value() { | |
268 local key=$1 | |
269 local value=$2 | |
270 local config_txt=$3 | |
271 if [ -z "$value" ] ; then | |
272 echo "No value for $key, left as default" | |
273 else | |
274 echo Setting "$key" to "$value" | |
275 sed -i 's,^'"$key"' .*,'"$key"' '"$value"',' $config_txt | |
276 fi | |
277 } | |
255 # Input files | 278 # Input files |
256 set_config_value platform $PLATFORM config.txt | 279 set_config_value platform $PLATFORM config.txt |
257 if [ "$PLATFORM" == "Illumina" ] ; then | 280 if [ "$PLATFORM" == "Illumina" ] ; then |
258 set_config_value inputFormat fastq config.txt | 281 set_config_value inputFormat fastq config.txt |
259 set_config_value pairedEnd 1 config.txt | 282 set_config_value pairedEnd 1 config.txt |
274 set_config_value 5merMinReps $MIN_5_MER_REPS config.txt | 297 set_config_value 5merMinReps $MIN_5_MER_REPS config.txt |
275 set_config_value 6merMinReps $MIN_6_MER_REPS config.txt | 298 set_config_value 6merMinReps $MIN_6_MER_REPS config.txt |
276 # Primer3 settings | 299 # Primer3 settings |
277 set_config_value primer3input Output/pr3in.txt config.txt | 300 set_config_value primer3input Output/pr3in.txt config.txt |
278 set_config_value primer3output Output/pr3out.txt config.txt | 301 set_config_value primer3output Output/pr3out.txt config.txt |
279 set_config_value keepPrimer3files 1 config.txt | |
280 set_config_value primer3executable $PRIMER3_CORE_EXE config.txt | 302 set_config_value primer3executable $PRIMER3_CORE_EXE config.txt |
281 set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt | 303 set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt |
282 set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt | 304 set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt |
283 set_config_value PRIMER_OPT_SIZE "$PRIMER_OPT_SIZE" config.txt | 305 set_config_value PRIMER_OPT_SIZE "$PRIMER_OPT_SIZE" config.txt |
284 set_config_value PRIMER_MIN_SIZE "$PRIMER_MIN_SIZE" config.txt | 306 set_config_value PRIMER_MIN_SIZE "$PRIMER_MIN_SIZE" config.txt |
305 fi | 327 fi |
306 tail -$MAX_LINES pal_finder.log | 328 tail -$MAX_LINES pal_finder.log |
307 # | 329 # |
308 # Check that log ends with "Done!!" message | 330 # Check that log ends with "Done!!" message |
309 if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then | 331 if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then |
310 fatal ERROR pal_finder failed to complete successfully | 332 echo ERROR pal_finder failed to complete successfully >&2 |
311 fi | 333 exit 1 |
312 echo "### pal_finder finished ###" | 334 fi |
313 # | 335 # |
314 # Check for errors in pal_finder output | 336 # Sort microsat_summary output |
315 echo "### Checking for errors ###" | 337 echo "### Sorting microsat summary output ###" |
316 if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then | 338 head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted |
317 echo ERROR primer3 terminated prematurely due to bad product size ranges | 339 grep "^$" Output/microsat_summary.txt>>microsat_summary.sorted |
318 cat >&2 <<EOF | 340 grep "^Microsat Type" Output/microsat_summary.txt >>microsat_summary.sorted |
319 ERROR primer3 terminated prematurely due to bad product size ranges | 341 tail -n +11 Output/microsat_summary.txt >>microsat_summary.sorted |
320 | 342 mv microsat_summary.sorted Output/microsat_summary.txt |
321 Pal_finder generated bad ranges for the following read IDs: | 343 # |
322 EOF | 344 # Sort PAL_summary output |
323 echo $(find_bad_primer_ranges Output/pr3in.txt) >&2 | 345 echo "### Sorting PAL summary output ###" |
324 cat >&2 <<EOF | |
325 | |
326 This error can occur when input data contains short R1 reads and has | |
327 has not been properly trimmed and filtered. | |
328 | |
329 EOF | |
330 fatal pal_finder failed to complete successfully | |
331 EOF | |
332 fi | |
333 # | |
334 # Sort outputs into a consistent order regardless of Perl version | |
335 echo "### Sorting outputs ###" | |
336 head -1 Output/PAL_summary.txt > Output/PAL_summary.sorted.txt | 346 head -1 Output/PAL_summary.txt > Output/PAL_summary.sorted.txt |
337 if [ "$PLATFORM" == "Illumina" ] ; then | 347 if [ "$PLATFORM" == "Illumina" ] ; then |
338 grep -v "^readPairID" Output/PAL_summary.txt | sort -k 1 >> Output/PAL_summary.sorted.txt | 348 grep -v "^readPairID" Output/PAL_summary.txt | sort -k 1 >> Output/PAL_summary.sorted.txt |
339 else | 349 else |
340 grep -v "^SequenceID" Output/PAL_summary.txt | sort -k 1 >> Output/PAL_summary.sorted.txt | 350 grep -v "^SequenceID" Output/PAL_summary.txt | sort -k 1 >> Output/PAL_summary.sorted.txt |
350 echo WARNING output too long, truncated to last $MAX_LINES lines: | 360 echo WARNING output too long, truncated to last $MAX_LINES lines: |
351 echo ... | 361 echo ... |
352 fi | 362 fi |
353 tail -$MAX_LINES pal_filter.log | 363 tail -$MAX_LINES pal_filter.log |
354 if [ $? -ne 0 ] ; then | 364 if [ $? -ne 0 ] ; then |
355 fatal $PALFINDER_FILTER exited with non-zero status | 365 echo ERROR $PALFINDER_FILTER exited with non-zero status >&2 |
366 exit 1 | |
356 elif [ ! -f PAL_summary.filtered ] ; then | 367 elif [ ! -f PAL_summary.filtered ] ; then |
357 fatal no output from $PALFINDER_FILTER | 368 echo ERROR no output from $PALFINDER_FILTER >&2 |
369 exit 1 | |
358 fi | 370 fi |
359 fi | 371 fi |
360 # | 372 # |
361 # Clean up | 373 # Clean up |
362 echo "### Handling output files ###" | 374 echo "### Handling output files ###" |
372 if [ ! -z "$OUTPUT_ASSEMBLY" ] ; then | 384 if [ ! -z "$OUTPUT_ASSEMBLY" ] ; then |
373 assembly=${fastq_r1%.*}_pal_filter_assembly_output.txt | 385 assembly=${fastq_r1%.*}_pal_filter_assembly_output.txt |
374 if [ -f "$assembly" ] ; then | 386 if [ -f "$assembly" ] ; then |
375 /bin/mv $assembly "$OUTPUT_ASSEMBLY" | 387 /bin/mv $assembly "$OUTPUT_ASSEMBLY" |
376 else | 388 else |
377 fatal no assembly output found | 389 echo ERROR no assembly output found >&2 |
390 exit 1 | |
378 fi | 391 fi |
379 fi | 392 fi |
380 if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then | 393 if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then |
381 /bin/mv config.txt $OUTPUT_CONFIG_FILE | 394 /bin/mv config.txt $OUTPUT_CONFIG_FILE |
382 fi | 395 fi |