comparison pal_finder_wrapper.sh @ 12:d26fb5260c67 draft

0.02.04.6: update to use conda to resolve dependencies.
author pjbriggs
date Thu, 15 Mar 2018 09:49:05 -0400
parents 84613d5f1713
children 3f8bf1a0403b
comparison
equal deleted inserted replaced
11:84613d5f1713 12:d26fb5260c67
51 echo $* 51 echo $*
52 # 52 #
53 # Maximum size reporting log file contents 53 # Maximum size reporting log file contents
54 MAX_LINES=500 54 MAX_LINES=500
55 # 55 #
56 # Get helper functions
57 . $(dirname $0)/pal_finder_wrapper_utils.sh
58 #
59 # Initialise locations of scripts, data and executables 56 # Initialise locations of scripts, data and executables
60 # 57 #
61 # Set these in the environment to overide at execution time 58 # Set these in the environment to overide at execution time
62 : ${PALFINDER_SCRIPT_DIR:=/usr/bin} 59 : ${PALFINDER_SCRIPT_DIR:=/usr/bin}
63 : ${PALFINDER_DATA_DIR:=/usr/share/pal_finder_v0.02.04} 60 : ${PALFINDER_DATA_DIR:=/usr/share/pal_finder_v0.02.04}
64 : ${PRIMER3_CORE_EXE:=primer3_core} 61 : ${PRIMER3_CORE_EXE:=primer3_core}
65 # 62 #
66 # Filter script is in the same directory as this script 63 # Filter script is in the same directory as this script
67 PALFINDER_FILTER=$(dirname $0)/pal_filter.py 64 PALFINDER_FILTER=$(dirname $0)/pal_filter.py
68 if [ ! -f $PALFINDER_FILTER ] ; then 65 if [ ! -f $PALFINDER_FILTER ] ; then
69 fatal No $PALFINDER_FILTER script 66 echo No $PALFINDER_FILTER script >&2
67 exit 1
70 fi 68 fi
71 # 69 #
72 # Check that we have all the components 70 # Check that we have all the components
71 function have_program() {
72 local program=$1
73 local got_program=$(which $program 2>&1 | grep "no $(basename $program) in")
74 if [ -z "$got_program" ] ; then
75 echo yes
76 else
77 echo no
78 fi
79 }
73 if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then 80 if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then
74 fatal "primer3_core missing: ${PRIMER3_CORE_EXE} not found" 81 echo "ERROR primer3_core missing: ${PRIMER3_CORE_EXE} not found" >&2
82 exit 1
75 fi 83 fi
76 if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then 84 if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then
77 fatal "pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" 85 echo "ERROR pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" >&2
86 exit 1
78 fi 87 fi
79 if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then 88 if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then
80 fatal "pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" 89 echo "ERROR pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" >&2
90 exit 1
81 fi 91 fi
82 # 92 #
83 # Initialise parameters used in the config.txt file 93 # Initialise parameters used in the config.txt file
84 PRIMER_PREFIX="test" 94 PRIMER_PREFIX="test"
85 MIN_2_MER_REPS=6 95 MIN_2_MER_REPS=6
106 # 116 #
107 # Collect command line arguments 117 # Collect command line arguments
108 if [ $# -lt 2 ] ; then 118 if [ $# -lt 2 ] ; then
109 echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" 119 echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]"
110 echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" 120 echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]"
111 fatal "Bad command line" 121 exits
112 fi 122 fi
113 if [ "$1" == "--454" ] ; then 123 if [ "$1" == "--454" ] ; then
114 PLATFORM="454" 124 PLATFORM="454"
115 FNA=$2 125 FNA=$2
116 else 126 else
223 done 233 done
224 # 234 #
225 # Check that primer3_core is available 235 # Check that primer3_core is available
226 got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"` 236 got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"`
227 if [ -z "$got_primer3" ] ; then 237 if [ -z "$got_primer3" ] ; then
228 fatal "primer3_core not found" 238 echo ERROR primer3_core not found >&2
239 exit 1
229 fi 240 fi
230 # 241 #
231 # Set up the working dir 242 # Set up the working dir
232 if [ "$PLATFORM" == "Illumina" ] ; then 243 if [ "$PLATFORM" == "Illumina" ] ; then
233 # Paired end Illumina data as input 244 # Paired end Illumina data as input
234 if [ $FASTQ_R1 == $FASTQ_R2 ] ; then 245 if [ $FASTQ_R1 == $FASTQ_R2 ] ; then
235 fatal ERROR R1 and R2 fastqs are the same file 246 echo ERROR R1 and R2 fastqs are the same file >&2
247 exit 1
236 fi 248 fi
237 ln -s $FASTQ_R1 249 ln -s $FASTQ_R1
238 ln -s $FASTQ_R2 250 ln -s $FASTQ_R2
239 fastq_r1=$(basename $FASTQ_R1) 251 fastq_r1=$(basename $FASTQ_R1)
240 fastq_r2=$(basename $FASTQ_R2) 252 fastq_r2=$(basename $FASTQ_R2)
250 # Copy in the default config.txt file 262 # Copy in the default config.txt file
251 echo "### Creating config.txt file for pal_finder run ###" 263 echo "### Creating config.txt file for pal_finder run ###"
252 /bin/cp $PALFINDER_DATA_DIR/config.txt . 264 /bin/cp $PALFINDER_DATA_DIR/config.txt .
253 # 265 #
254 # Update the config.txt file with new values 266 # Update the config.txt file with new values
267 function set_config_value() {
268 local key=$1
269 local value=$2
270 local config_txt=$3
271 if [ -z "$value" ] ; then
272 echo "No value for $key, left as default"
273 else
274 echo Setting "$key" to "$value"
275 sed -i 's,^'"$key"' .*,'"$key"' '"$value"',' $config_txt
276 fi
277 }
255 # Input files 278 # Input files
256 set_config_value platform $PLATFORM config.txt 279 set_config_value platform $PLATFORM config.txt
257 if [ "$PLATFORM" == "Illumina" ] ; then 280 if [ "$PLATFORM" == "Illumina" ] ; then
258 set_config_value inputFormat fastq config.txt 281 set_config_value inputFormat fastq config.txt
259 set_config_value pairedEnd 1 config.txt 282 set_config_value pairedEnd 1 config.txt
274 set_config_value 5merMinReps $MIN_5_MER_REPS config.txt 297 set_config_value 5merMinReps $MIN_5_MER_REPS config.txt
275 set_config_value 6merMinReps $MIN_6_MER_REPS config.txt 298 set_config_value 6merMinReps $MIN_6_MER_REPS config.txt
276 # Primer3 settings 299 # Primer3 settings
277 set_config_value primer3input Output/pr3in.txt config.txt 300 set_config_value primer3input Output/pr3in.txt config.txt
278 set_config_value primer3output Output/pr3out.txt config.txt 301 set_config_value primer3output Output/pr3out.txt config.txt
279 set_config_value keepPrimer3files 1 config.txt
280 set_config_value primer3executable $PRIMER3_CORE_EXE config.txt 302 set_config_value primer3executable $PRIMER3_CORE_EXE config.txt
281 set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt 303 set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt
282 set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt 304 set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt
283 set_config_value PRIMER_OPT_SIZE "$PRIMER_OPT_SIZE" config.txt 305 set_config_value PRIMER_OPT_SIZE "$PRIMER_OPT_SIZE" config.txt
284 set_config_value PRIMER_MIN_SIZE "$PRIMER_MIN_SIZE" config.txt 306 set_config_value PRIMER_MIN_SIZE "$PRIMER_MIN_SIZE" config.txt
305 fi 327 fi
306 tail -$MAX_LINES pal_finder.log 328 tail -$MAX_LINES pal_finder.log
307 # 329 #
308 # Check that log ends with "Done!!" message 330 # Check that log ends with "Done!!" message
309 if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then 331 if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then
310 fatal ERROR pal_finder failed to complete successfully 332 echo ERROR pal_finder failed to complete successfully >&2
311 fi 333 exit 1
312 echo "### pal_finder finished ###" 334 fi
313 # 335 #
314 # Check for errors in pal_finder output 336 # Sort microsat_summary output
315 echo "### Checking for errors ###" 337 echo "### Sorting microsat summary output ###"
316 if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then 338 head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted
317 echo ERROR primer3 terminated prematurely due to bad product size ranges 339 grep "^$" Output/microsat_summary.txt>>microsat_summary.sorted
318 cat >&2 <<EOF 340 grep "^Microsat Type" Output/microsat_summary.txt >>microsat_summary.sorted
319 ERROR primer3 terminated prematurely due to bad product size ranges 341 tail -n +11 Output/microsat_summary.txt >>microsat_summary.sorted
320 342 mv microsat_summary.sorted Output/microsat_summary.txt
321 Pal_finder generated bad ranges for the following read IDs: 343 #
322 EOF 344 # Sort PAL_summary output
323 echo $(find_bad_primer_ranges Output/pr3in.txt) >&2 345 echo "### Sorting PAL summary output ###"
324 cat >&2 <<EOF
325
326 This error can occur when input data contains short R1 reads and has
327 has not been properly trimmed and filtered.
328
329 EOF
330 fatal pal_finder failed to complete successfully
331 EOF
332 fi
333 #
334 # Sort outputs into a consistent order regardless of Perl version
335 echo "### Sorting outputs ###"
336 head -1 Output/PAL_summary.txt > Output/PAL_summary.sorted.txt 346 head -1 Output/PAL_summary.txt > Output/PAL_summary.sorted.txt
337 if [ "$PLATFORM" == "Illumina" ] ; then 347 if [ "$PLATFORM" == "Illumina" ] ; then
338 grep -v "^readPairID" Output/PAL_summary.txt | sort -k 1 >> Output/PAL_summary.sorted.txt 348 grep -v "^readPairID" Output/PAL_summary.txt | sort -k 1 >> Output/PAL_summary.sorted.txt
339 else 349 else
340 grep -v "^SequenceID" Output/PAL_summary.txt | sort -k 1 >> Output/PAL_summary.sorted.txt 350 grep -v "^SequenceID" Output/PAL_summary.txt | sort -k 1 >> Output/PAL_summary.sorted.txt
350 echo WARNING output too long, truncated to last $MAX_LINES lines: 360 echo WARNING output too long, truncated to last $MAX_LINES lines:
351 echo ... 361 echo ...
352 fi 362 fi
353 tail -$MAX_LINES pal_filter.log 363 tail -$MAX_LINES pal_filter.log
354 if [ $? -ne 0 ] ; then 364 if [ $? -ne 0 ] ; then
355 fatal $PALFINDER_FILTER exited with non-zero status 365 echo ERROR $PALFINDER_FILTER exited with non-zero status >&2
366 exit 1
356 elif [ ! -f PAL_summary.filtered ] ; then 367 elif [ ! -f PAL_summary.filtered ] ; then
357 fatal no output from $PALFINDER_FILTER 368 echo ERROR no output from $PALFINDER_FILTER >&2
369 exit 1
358 fi 370 fi
359 fi 371 fi
360 # 372 #
361 # Clean up 373 # Clean up
362 echo "### Handling output files ###" 374 echo "### Handling output files ###"
372 if [ ! -z "$OUTPUT_ASSEMBLY" ] ; then 384 if [ ! -z "$OUTPUT_ASSEMBLY" ] ; then
373 assembly=${fastq_r1%.*}_pal_filter_assembly_output.txt 385 assembly=${fastq_r1%.*}_pal_filter_assembly_output.txt
374 if [ -f "$assembly" ] ; then 386 if [ -f "$assembly" ] ; then
375 /bin/mv $assembly "$OUTPUT_ASSEMBLY" 387 /bin/mv $assembly "$OUTPUT_ASSEMBLY"
376 else 388 else
377 fatal no assembly output found 389 echo ERROR no assembly output found >&2
390 exit 1
378 fi 391 fi
379 fi 392 fi
380 if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then 393 if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then
381 /bin/mv config.txt $OUTPUT_CONFIG_FILE 394 /bin/mv config.txt $OUTPUT_CONFIG_FILE
382 fi 395 fi