Previous changeset 14:3f8bf1a0403b (2018-03-22) Next changeset 16:856cafcbf422 (2018-05-15) |
Commit message:
pal_finder 0.02.04.7 for testing. |
modified:
README.rst pal_finder_macros.xml pal_finder_wrapper.sh pal_finder_wrapper.xml pal_finder_wrapper_utils.sh |
added:
test-data/illuminaPE_bad_primer_read_ids.out test-data/illuminaPE_microsats_bad_ranges.out.re_match test-data/illuminaPE_microsats_subset.out.re_match test-data/illuminaPE_r1_bad_ranges.fq test-data/illuminaPE_r1_no_microsats.fq test-data/illuminaPE_r2_bad_ranges.fq test-data/illuminaPE_r2_no_microsats.fq |
b |
diff -r 3f8bf1a0403b -r a3af1ff4cad1 README.rst --- a/README.rst Thu Mar 22 07:21:26 2018 -0400 +++ b/README.rst Mon May 14 11:10:19 2018 -0400 |
b |
@@ -63,7 +63,9 @@ 0.02.04.7 - Trap for errors in ``pal_finder_v0.02.04.pl`` resulting in bad ranges being supplied to ``primer3_core`` for some reads via - ``PRIMER_PRODUCT_RANGE_SIZE``. + ``PRIMER_PRODUCT_RANGE_SIZE`` (and enable 'bad' reads to be output + to a dataset); add new option to use a random subset of reads for + microsatellite detection. 0.02.04.6 - Update to get dependencies using ``conda`` when installed from the toolshed (this removes the explicit dependency on Perl 5.16 introduced in 0.02.04.2, as a result the outputs from the tool are |
b |
diff -r 3f8bf1a0403b -r a3af1ff4cad1 pal_finder_macros.xml --- a/pal_finder_macros.xml Thu Mar 22 07:21:26 2018 -0400 +++ b/pal_finder_macros.xml Mon May 14 11:10:19 2018 -0400 |
b |
@@ -14,6 +14,7 @@ <has_line line="readsWithMicrosat:	13" /> <has_line line="totalBases:	2320" /> <has_line line="totalReads:	20	(2 x 10)" /> + <has_line line="readsWithBadRanges:	0" /> <has_line line="Microsat Type	monomer length	total loci	loci w/ primers	reads with loci	total bases	extended	extended w/ primers	spanning	spanning w/ primers" /> <has_line_matching expression="(AC|TG)\t2\t7\t4\t7\t116\t0?\t0?\t0?\t0?" /> <has_line_matching expression="(AT|CG)\t2\t8\t0\t6\t106\t0?\t0?\t0?\t0?" /> @@ -21,6 +22,44 @@ </assert_contents> </output> </xml> + <xml name="output_illumina_microsat_subset_summary"> + <output name="output_microsat_summary"> + <assert_contents> + <has_line line="allExtended:	0" /> + <has_line line="allSpan:	0" /> + <has_line line="broken:	0" /> + <has_line line="compound:	2" /> + <has_line line="readsWithMicrosat:	7" /> + <has_line line="totalBases:	1160" /> + <has_line line="totalReads:	10	(2 x 5)" /> + <has_line line="Microsat Type	monomer length	total loci	loci w/ primers	reads with loci	total bases	extended	extended w/ primers	spanning	spanning w/ primers" /> + <has_line_matching expression="(AC|TG)\t2\t6\t3\t6\t104\t0?\t0?\t0?\t0?" /> + <has_line_matching expression="(AT|CG)\t2\t3\t0\t3\t38\t0?\t0?\t0?\t0?" /> + <has_line_matching expression="(AG|TC)\t2\t0\t0\t0\t0\t0?\t0?\t0?\t0?" /> + </assert_contents> + </output> + </xml> + <xml name="output_illumina_microsat_summary_bad_ranges"> + <output name="output_microsat_summary"> + <assert_contents> + <has_line line="allExtended:	2" /> + <has_line line="allSpan:	0" /> + <has_line line="broken:	0" /> + <has_line line="compound:	4" /> + <has_line line="readsWithMicrosat:	12" /> + <has_line line="totalBases:	2231" /> + <has_line line="totalReads:	12	(2 x 6)" /> + <has_line line="readsWithBadRanges:	2" /> + <has_line line="Microsat Type	monomer length	total loci	loci w/ primers	reads with loci	total bases	extended	extended w/ primers	spanning	spanning w/ primers" /> + <!-- + I'd like to do a basic check on the remainder of the file but + there are too many lines for the 'assert_contents' method (and + the tag doesn't provide the functionality to do a simple line + count + --> + </assert_contents> + </output> + </xml> <xml name="output_454_microsat_summary"> <output name="output_microsat_summary"> <assert_contents> |
b |
diff -r 3f8bf1a0403b -r a3af1ff4cad1 pal_finder_wrapper.sh --- a/pal_finder_wrapper.sh Thu Mar 22 07:21:26 2018 -0400 +++ b/pal_finder_wrapper.sh Mon May 14 11:10:19 2018 -0400 |
[ |
@@ -32,6 +32,7 @@ # -primers: run the 'primers' filter option # -occurrences: run the 'occurrences' filter option # -rankmotifs: run the 'rankmotifs' filter option +# --subset N: use a subset of reads of size N # # pal_finder is available from http://sourceforge.net/projects/palfinder/ # @@ -104,7 +105,8 @@ OUTPUT_ASSEMBLY= FILTERED_MICROSATS= FILTER_OPTIONS= -BAD_PRIMER_RANGES= +SUBSET= +RANDOM_SEED=568765 # # Collect command line arguments if [ $# -lt 2 ] ; then @@ -220,6 +222,10 @@ shift OUTPUT_ASSEMBLY=$1 ;; + --subset) + shift + SUBSET=$1 + ;; *) echo Unknown option: $1 >&2 exit 1 @@ -234,6 +240,25 @@ fatal "primer3_core not found" fi # +# Check the n-mers specification +if [ $MIN_6_MER_REPS -ne 0 ] ; then + if [ $MIN_5_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 5-mers cannot be zero if number of 6-mers is non-zero" + fi +fi +if [ $MIN_5_MER_REPS -ne 0 ] ; then + if [ $MIN_4_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero" + fi +fi +if [ $MIN_4_MER_REPS -ne 0 ] ; then + if [ $MIN_3_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 3-mers cannot be zero if number of 4-mers is non-zero" + fi +fi +if [ $MIN_2_MER_REPS -eq 0 ] ; then + fatal "Minimum number of 2-mer repeats cannot be zero" +fi # Set up the working dir if [ "$PLATFORM" == "Illumina" ] ; then # Paired end Illumina data as input @@ -253,6 +278,14 @@ PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY) mkdir Output # +# Use a subset of reads +if [ ! -z "$SUBSET" ] ; then + echo "### Extracting subset of reads ###" + $(dirname $0)/fastq_subset.py -n $SUBSET -s $RANDOM_SEED $fastq_r1 $fastq_r2 + fastq_r1="subset_r1.fq" + fastq_r2="subset_r2.fq" +fi +# # Copy in the default config.txt file echo "### Creating config.txt file for pal_finder run ###" /bin/cp $PALFINDER_DATA_DIR/config.txt . @@ -311,8 +344,13 @@ fi tail -$MAX_LINES pal_finder.log # -# Check that log ends with "Done!!" message -if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then +# Check for success/failure +if [ ! -z "$(tail -n 1 pal_finder.log | grep 'No microsatellites found in any reads. Ending script.')" ] ; then + # No microsatellites found + fatal ERROR pal_finder failed to locate any microsatellites + exit 1 +elif [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then + # Log doesn't end with "Done!!" (indicates failure) fatal ERROR pal_finder failed to complete successfully fi echo "### pal_finder finished ###" @@ -321,33 +359,38 @@ echo "### Checking for errors ###" if [ ! -z "$(grep 'primer3_core: Illegal element in PRIMER_PRODUCT_SIZE_RANGE' pal_finder.log)" ] ; then echo WARNING primer3 terminated prematurely due to bad product size ranges + $(find_bad_primer_ranges Output/pr3in.txt bad_primer_ranges.txt) + N_BAD_PRIMERS=$(cat bad_primer_ranges.txt | wc -l) if [ -z "$BAD_PRIMER_RANGES" ] ; then # No output file so report to stderr - cat >&2 <<EOF -ERROR primer3 terminated prematurely due to bad product size ranges + cat <<EOF Pal_finder generated bad ranges for the following read IDs: + EOF - echo $(find_bad_primer_ranges Output/pr3in.txt) >&2 - cat >&2 <<EOF + cat bad_primer_ranges.txt + cat <<EOF This error can occur when input data contains short R1 reads and has has not been properly trimmed and filtered. EOF else - # Dump bad ranges to file + # Move the bad ranges to the specified file echo "### Writing read IDs with bad primer ranges ###" - echo $(find_bad_primer_ranges Output/pr3in.txt) >"$BAD_PRIMER_RANGES" + /bin/mv bad_primer_ranges.txt "$BAD_PRIMER_RANGES" fi +else + N_BAD_PRIMERS=0 fi # # Sort microsat_summary output echo "### Sorting microsat summary output ###" head -n 7 Output/microsat_summary.txt | sort >microsat_summary.sorted +echo "readsWithBadRanges:"$'\t'"$((N_BAD_PRIMERS * 2))" >>microsat_summary.sorted grep "^$" Output/microsat_summary.txt>>microsat_summary.sorted grep "^Microsat Type" Output/microsat_summary.txt >>microsat_summary.sorted -tail -n +11 Output/microsat_summary.txt >>microsat_summary.sorted +tail -n +11 Output/microsat_summary.txt | sort -r -n -k 5 >>microsat_summary.sorted mv microsat_summary.sorted Output/microsat_summary.txt # # Sort PAL_summary output |
b |
diff -r 3f8bf1a0403b -r a3af1ff4cad1 pal_finder_wrapper.xml --- a/pal_finder_wrapper.xml Thu Mar 22 07:21:26 2018 -0400 +++ b/pal_finder_wrapper.xml Mon May 14 11:10:19 2018 -0400 |
[ |
b'@@ -9,7 +9,7 @@\n <requirement type="package" version="1.65">biopython</requirement>\n <requirement type="package" version="2.8.1">pandaseq</requirement>\n </requirements>\n- <command><![CDATA[\n+ <command detect_errors="exit_code"><![CDATA[\n @CONDA_PAL_FINDER_SCRIPT_DIR@ &&\n @CONDA_PAL_FINDER_DATA_DIR@ &&\n bash $__tool_directory__/pal_finder_wrapper.sh\n@@ -64,6 +64,10 @@\n #if str( $platform.assembly ) == \'-assembly\'\n $platform.assembly "$output_assembly"\n #end if\n+ #set $use_all_reads = $platform.subset_conditional.use_all_reads\n+ #if str( $use_all_reads ) != "yes"\n+ --subset "$platform.subset_conditional.subset"\n+ #end if\n #end if\n ]]></command>\n <inputs>\n@@ -91,6 +95,13 @@\n \t\t label="Select FASTQ dataset collection with R1/R2 pair" />\n \t </when>\n \t</conditional>\n+\t<conditional name="subset_conditional">\n+\t <param name="use_all_reads" type="boolean" label="Use all reads for microsatellite detection?" checked="True" truevalue="yes" falsevalue="no" />\n+\t <when value="no">\n+\t <param name="subset" type="text" value="0.5" label="Number or fraction of reads to use" help="Either an integer number of reads or a decimal fraction (e.g. 0.5 to select 50% of reads)" />\n+\t </when>\n+\t <when value="yes" />\n+\t</conditional>\n \t<param name="filters" type="select" display="checkboxes"\n \t multiple="True" label="Filters to apply to the pal_finder results"\n \t help="Apply none, one or more filters to refine results">\n@@ -106,7 +117,7 @@\n \t<param name="input_fasta" type="data" format="fasta" label="454 fasta file with raw reads" />\n </when>\n </conditional>\n- <param name="min_2mer_repeats" type="integer" value="6" label="Minimum number of 2-mer repeat units to detect" help="Set to zero to ignore repeats of this n-mer unit" />\n+ <param name="min_2mer_repeats" type="integer" value="6" label="Minimum number of 2-mer repeat units to detect" min="1" help="Must detect at least one repeat of this n-mer unit" />\n <param name="min_3mer_repeats" type="integer" value="0" label="Minimum number of 3-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" />\n <param name="min_4mer_repeats" type="integer" value="0" label="Minimum number of 4-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" />\n <param name="min_5mer_repeats" type="integer" value="0" label="Minimum number of 5-mer repeat units" help="Set to zero to ignore repeats of this n-mer unit" />\n@@ -158,8 +169,9 @@\n \t label="Maximum acceptable difference between melting temperatures of left and right primers (PRIMER_PAIR_MAX_DIFF_TM)"\n \t help="Temperature should be in degrees Celsius" />\n </when>\n+ <when value="default" />\n </conditional>\n- <param name="report_bad_primer_ranges" type="boolean" truevalue="True" falsevalue="False" label="Output IDs for input reads which generate bad primer ranges" help="Can be used to screen input Fastqs" />\n+ <param name="report_bad_primer_ranges" type="boolean" truevalue="True" falsevalue="False" label="Output IDs for input reads which generate bad primer product size ranges" help="Can be used to screen reads in input Fastqs " />\n <param name="keep_config_file" type="boolean" truevalue="True" falsevalue="False"\n \t label="Output the config file to the history"\n \t help="Can be used to run pal_finder outside of Galaxy" />\n@@ -254,18 +266,76 @@\n <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats.out.re_match" />\n <output name="output_filtered_microsats" compare="re_match" file="illuminaPE_filtered_microsats_rankmotifs.out.re_match" />\n </test>\n- <!-- Test with Illumina input generating bad primer ranges\n- -->\n+ <!-- Test with Illumina input using subset of reads -->\n+ <test>\n+ <param name="platform_type" value="illumina" />\n+ <param name="filters" value="" />\n+ <param name="assembly" value="false" />\n+ <param name="use_all_re'..b'imer_opt_tm" value="68.0" />\n+ <param name="primer_pair_max_diff_tm" value="3.0" />\n+ <param name="report_bad_primer_ranges" value="true" />\n+ <expand macro="output_illumina_microsat_summary_bad_ranges" />\n+ <output name="output_pal_summary" compare="re_match" file="illuminaPE_microsats_bad_ranges.out.re_match" />\n+ <output name="output_bad_primer_read_ids" file="illuminaPE_bad_primer_read_ids.out" />\n+ </test>\n+ <!-- Test with bad n-mers specified -->\n+ <test expect_failure="true">\n+ <param name="platform_type" value="illumina" />\n+ <param name="filters" value="" />\n+ <param name="assembly" value="false" />\n+ <param name="min_2mer_repeats" value="8" />\n+ <param name="min_3mer_repeats" value="8" />\n+ <param name="min_4mer_repeats" value="0" />\n+ <param name="min_5mer_repeats" value="8" />\n+ <param name="min_6mer_repeats" value="8" />\n+ <param name="input_fastq_r1" value="illuminaPE_r1_no_microsats.fq" ftype="fastqsanger" />\n+ <param name="input_fastq_r2" value="illuminaPE_r2_no_microsats.fq" ftype="fastqsanger" />\n+ <assert_stderr>\n+\t<has_text text="Minimum number of 4-mers cannot be zero if number of 5-mers is non-zero" />\n+ </assert_stderr>\n </test>\n <!-- Test with 454 input -->\n <test>\n@@ -300,26 +370,49 @@\n \n -------------\n \n+.. class:: infomark\n+\n+**Known issues**\n+\n .. class:: warning\n \n-**Known problems**\n-\n-.. class:: infomark\n-\n-**Bad primer product size ranges**\n+**Low number of reads used for microsatellite detection/bad primer product size ranges**\n \n For some datasets pal_finder may generate \'bad\' product size ranges (where the\n lower limit exceeds the upper limit) for one or more reads, for input into\n-primer3_core.\n+primer3_core. In these cases primer3_core will terminate prematurely, which can\n+result in a substantially lower number of reads being used for microsatellite\n+detection and potentially sub-optimal primer design.\n+\n+The number of reads generating the bad size ranges are reported in the\n+*Summary of microsat types* output dataset as \'readsWithBadRanges\'. Ideally\n+the reported value should be zero.\n+\n+The conditions which cause this issue within pal_finder are still unclear,\n+however we believe it to be associated with short or low quality reads. If this\n+problem affects your data then:\n+\n+* Ensure that the input data are sufficiently trimmed and filtered (using\n+ e.g. the Trimmomatic tool) before rerunning pal_finder.\n \n-If this occurs then the tool will terminate with an error. A list of the reads\n-for which the bad ranges were generated can be found in the error message\n-which can be accessed via the \'bug\' icon from a failed dataset.\n+* A list of read IDs for which pal_finder generates bad product size ranges can\n+ be output by turning on *Output IDs for input reads which generate bad primer\n+ ranges*. This outputs an additional dataset with a list of read IDs which can\n+ be used to remove read pairs from the input Fastq files (using e.g. the *Filter\n+ sequences by ID* tool) before rerunning pal_finder.\n+\n+.. class:: warning\n \n-The conditions which cause this error are unclear. However we believe it to be\n-associated with short or low quality reads. It is recommended that the input\n-data are sufficiently trimmed and filtered (using e.g. the Trimmomatic tool)\n-before rerunning pal_finder.\n+**Pal_finder takes a long time to run for large input datasets**\n+\n+pal_finder was originally developed using MiSeq data, and is not optimised for\n+working with the larger Fastqs that are output from other platforms such as\n+HiSeq and NextSeq. As a consequence pal_finder may take a very long time to\n+complete when operating on larger datasets.\n+\n+If this is a problem then the tool can be run using a subset of the input reads\n+by unchecking the *Use all reads...* option and entering either an integer number\n+of reads to use, or a decimal fraction (e.g. 0.5 will select 50% of the reads).\n \n -------------\n \n' |
b |
diff -r 3f8bf1a0403b -r a3af1ff4cad1 pal_finder_wrapper_utils.sh --- a/pal_finder_wrapper_utils.sh Thu Mar 22 07:21:26 2018 -0400 +++ b/pal_finder_wrapper_utils.sh Mon May 14 11:10:19 2018 -0400 |
[ |
@@ -38,17 +38,21 @@ # sequence ids where the PRIMER_PRODUCT_SIZE_RANGE has # upper limit which is smaller than lower limit local pr3in=$1 + local outfile=$2 local pattern="^(SEQUENCE_ID|PRIMER_PRODUCT_SIZE_RANGE)" for line in $(grep -E "$pattern" $pr3in | sed 's/ /^/' | sed 'N;s/\n/*/') do # Loop over pairs of SEQUENCE_ID and PRIMER_PRODUCT_SIZE_RANGE # keywords in the primer3 input if [ ! -z "$(echo $line | grep ^SEQUENCE_ID)" ] ; then - # Extract the values + # Lines look like: + # SEQUENCE_ID=(AT_1_16)(AT_1_16)M00879:99:000000000-AH9KG:1:2107:10006:2535*PRIMER_PRODUCT_SIZE_RANGE=194-329^59-194 local size_range=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2 | tr '^' ' ') local seq_id=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2) - else - local size_range=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2) + elif [ ! -z "$(echo $line | grep ^PRIMER_PRODUCT_SIZE_RANGE)" ] ; then + # Lines look like: + # PRIMER_PRODUCT_SIZE_RANGE=194-329^59-194*SEQUENCE_ID=(AT_1_16)(AT_1_16)M00879:99:000000000-AH9KG:1:2107:10006:2535 + local size_range=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2 | tr '^' ' ') local seq_id=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2) fi seq_id=$(echo $seq_id | cut -d')' -f3) @@ -58,14 +62,14 @@ for range in $(echo $size_range) ; do local lower=$(echo $range | cut -d'-' -f1) local upper=$(echo $range | cut -d'-' -f2) - if [ $lower -gt $upper ] ; then + if [ "$lower" -gt "$upper" ] ; then bad_range=yes break fi done # Report if the range is wrong if [ ! -z "$bad_range" ] ; then - echo "$seq_id ($size_range)" + echo "${seq_id}"$'\t'"(${size_range})" >>$outfile fi done } |
b |
diff -r 3f8bf1a0403b -r a3af1ff4cad1 test-data/illuminaPE_bad_primer_read_ids.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_bad_primer_read_ids.out Mon May 14 11:10:19 2018 -0400 |
b |
@@ -0,0 +1,1 @@ +M00879:99:000000000-AH9KG:1:2107:14372:5471 (74-71 77-74) |
b |
diff -r 3f8bf1a0403b -r a3af1ff4cad1 test-data/illuminaPE_microsats_bad_ranges.out.re_match --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_microsats_bad_ranges.out.re_match Mon May 14 11:10:19 2018 -0400 |
b |
@@ -0,0 +1,7 @@ +readPairID\ Motifs\(bases\)\ Bases\ in\ all\ Motifs\ Possible\ Extended\ Possible\ Spanning\ Primers\ found\ \(1\=y\,0\=n\)\ F\ Primer\ Name\ Forward\ Primer\ R\ Primer\ Name\ Reverse\ Primer\ Amplicon\ Motifs\ Number\ motif\ bases\ in\ amplicon\ Primers\ on\ sep\ reads\ Extend\ with\ primers\ Spand\ with\ primers\ Occurances\ of\ Forward\ Primer\ in\ Reads\ Occurances\ of\ Reverse\ Primer\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs +M00879\:99\:000000000\-AH9KG\:1\:2107\:10006\:2535\ AT\(16\)\ AT\(16\)\ \ 32\ AT\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +M00879\:99\:000000000\-AH9KG\:1\:2107\:10032\:7900\ .*\ \ 164\ \ \ 1\ test\_.*\ (CGAAAGATGCTATAGAAGCGATGGGG|TATCTATCTATCAATCCGCTCCCC)\ test\_.*\ (GGACATCGAGATAGAAAGGGGACCG|TGATTGGACATCGAGATAGAAAGGG)\ .*\ \ 80\ 1\ \ \ .*\ .*\ 1\ 1 +M00879\:99\:000000000\-AH9KG\:1\:2107\:10061\:6317\ .*\ \ 76\ \ \ 1\ test\_.*\ GAGAGAGTACATAGATATCTCACGGGGCG\ test\_.*\ GCAACGGCACAGATCTCTTCTACGG\ .*\ \ 22\ 1\ \ \ 1\ 1\ 1\ 1 +M00879\:99\:000000000\-AH9KG\:1\:2107\:10072\:8112\ .*\ \ 44\ \ \ 1\ test\_.*\ AGTTTGTTACAGGGCATGACAACGG\ test\_.*\ TCCTGTTATCTTCTTGTTGCTTGGC\ .*\ \ 22\ 1\ \ \ 1\ 1\ 1\ 1 +M00879\:99\:000000000\-AH9KG\:1\:2107\:10084\:6474\ .*\ \ 100\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +M00879\:99\:000000000\-AH9KG\:1\:2107\:14372\:5471\ .*\ \ 68\ .*\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ |
b |
diff -r 3f8bf1a0403b -r a3af1ff4cad1 test-data/illuminaPE_microsats_subset.out.re_match --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_microsats_subset.out.re_match Mon May 14 11:10:19 2018 -0400 |
b |
@@ -0,0 +1,6 @@ +readPairID\ Motifs\(bases\)\ Bases\ in\ all\ Motifs\ Possible\ Extended\ Possible\ Spanning\ Primers\ found\ \(1\=y\,0\=n\)\ F\ Primer\ Name\ Forward\ Primer\ R\ Primer\ Name\ Reverse\ Primer\ Amplicon\ Motifs\ Number\ motif\ bases\ in\ amplicon\ Primers\ on\ sep\ reads\ Extend\ with\ primers\ Spand\ with\ primers\ Occurances\ of\ Forward\ Primer\ in\ Reads\ Occurances\ of\ Reverse\ Primer\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ Reads\ Occurances\ of\ Amplifiable\ Primer\ Pair\ in\ PALs +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:17449\:1584\ (AC|TG)\(36\)\ \ 36\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5626\:1554\ AT\(14\)\ (AC|TG)\(16\)\ (AC|TG)\(16\)\ AT\(12\)\ \ 58\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:5879\:1238\ AT\(12\)\ \ 12\ \ \ 0\ \ \ \ \ \ \ \ \ \ \ \ \ +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8157\:1636\ (AC|TG)\(12\)\ \ 12\ \ \ 1\ test\_.*\ AAGTACAGTGGGGAGGCTGG\ test\_.*\ TTTTCTACACAGCTCAAGTAGCCC\ (AC|TG)\(12\)\ \ 12\ 1\ \ \ 1\ 1\ 1\ 1 +ILLUMINA\-545855\:49\:FC61RLR\:2\:1\:8899\:1514\ (AC|TG)\(12\)\ (AC|TG)\(12\)\ \ 24\ \ \ 1\ test\_.*\ TCTTTATCTAAACACATCCTGAAATACC\ test\_.*\ AAACGCAATTATTTTGAGATGTCC\ (AC|TG)\(12\)\ (AC|TG)\(12\)\ \ 24\ 1\ \ \ 1\ 2\ 1\ 1 |
b |
diff -r 3f8bf1a0403b -r a3af1ff4cad1 test-data/illuminaPE_r1_bad_ranges.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_r1_bad_ranges.fq Mon May 14 11:10:19 2018 -0400 |
b |
@@ -0,0 +1,24 @@ +@M00879:99:000000000-AH9KG:1:2107:10006:2535 1:N:0:TAAGGCGA+TAGATCGC +ATATATATATATATATGTAGTATAATCTCACTGGAAAGGAGACGAAAACGAGCAGCTCCGAGCTTTCGACTTTATTTCAAGTCATCTTCAGGGCAACTGACAAATTTTTGTGTAGCAATAGTATATAGACACCAGACGAGATTCCTGACCTCACATCTGGGAGG ++ +CCCCCFGGGGGGGGGGGGGGGFCGGGGGGFGCG@FFGCDGGGGGGGGEGGGDGGFDCFGGGCDGGGGGGGGGGGGGGGGDGFGFGFGFFGGG@FEGGDEFGGGGGEFGGGGGGGFGFGGGGGGAFE?9FGGGGGGGG@F7F7AFDGGFFGFFGGG9EE9D8?>8 +@M00879:99:000000000-AH9KG:1:2107:10032:7900 1:N:0:TAAGGCGA+TAGATCGC +ATCTATGTATTTATCTATCTCCTCTCTCTCTCTCTCTCTCTCTCTCTCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCTATCAATCCGCTCCCCTTTCTATCTCGATGTCCAATCATTATACACACACCTACACGAAAGATGCTATAGAAGCGATGGGGGACTATAGGTGTATAGCAACTCTATACATCAACCAGTCTCTGCGCTCGTCCCCCTGTCG ++ +CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGEGGGGGGGGGGGGGGGFGGGCFGG@FGGEFGGDGGGGGGGGGGGGGGA9EFFC,@@F@FG8FGG9?EGG,,4:?@FFAGG@EFF<B,CFBEGGD:EFGD7F+>+,4,@,9E9,@ECEEEFC+8+4>B88;=E3,,2,@A68,7=@DDGED=A8=A8FGE+@DDD,++0@+=0+<3<>095?** +@M00879:99:000000000-AH9KG:1:2107:10061:6317 1:N:0:TAAGGCGA+TAGATCGC +TATATATATATATGAGAAAACTGGTGTCGGTGAGGACTCATCAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGTACATAGATATCTCACGGGGCGAGCTGCCGCTGCTGCTGCTGCCGCGGAGGGTGCAGAATAAGACGAGGAGGTGGAGAGAGGTGTGGAAAAAGTGCCGTAGAAGAGATCTGTGCCGTCGCACAGCTCTCGTGGTGCACCTATGGGAGAGGCGG ++ +CCCC@FFFFFGGFGGGGGGGG8EFG?FFGGEGGGFGGGGGGGGGGGGGGDFGGGGGGGGGGGGGGGGGGGGFG@CFFGGGGGGGGECGGGGGGGF,9,,C,E,?,,9,5<,,:=+78B+++@,,,+++6+,8,,A55A,,+++@+87:*3*@3D79;,,,451***>B4<**6,=***4=;8*+++*/*21+0**3//CEG86+A9:1<++2**)*)2)**+629*9*715:***00*:*755*0*.00 +@M00879:99:000000000-AH9KG:1:2107:10072:8112 1:N:0:TAAGGCGA+TAGATCGC +GTGTGTGTGTGTGTGTGTGTGTTGGTTGTGAAAGTTATCTCAGCTTGTTATATGGAATGACGACTGTCAAGCTTACCAGTTTCTTACATGGGATGACAAAGGCTAGGCTTACCAACTTGTTACATGTAATGACATCGGTAAAGCTTATCAGTTTGTTACAGGGCATGACAACGGTCAAGCTTACCAGCATGATACATGTAGTGACGAAGGCCAAGCCACAAGAAGATAACAGGCAC ++ +CCCCCFGGGGFFFGGFFGFGGGGGGGGGGDFGGGCGFGGGG,FFGGFFFCDCDEFFCFGGFGCEGDCG<DGGFA@FCFCFDGFGCEFFGG,CEECFGFGFG,BAFFGGGGGGGAFEC,EFDGGGFGGGGGFGFGGGGEEFGBAFDFCGCAFGGGGEEFFGCFG+9EFGGGD88BECEGGG?EGGGG8,@EF,,@DDFA@3@:DA9CEEDE6AFGGF7D+5CEF591C:>7>CD*;F +@M00879:99:000000000-AH9KG:1:2107:14372:5471 1:N:0:TAAGGCGA+TAGATCGC +GAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAG ++ +CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG +@M00879:99:000000000-AH9KG:1:2107:10084:6474 1:N:0:TAAGGCGA+TAGATCGC +CAATTACCTAAGTATCTGAATAGTCATTTCCCTATTAGGGGTGATCATAGTCGAACGGTAGCACTCCCGCTGGTCACGCTCTGGACCAGAGTTCAAATCTTGGCTGGTCGAGGTTTACGTGTTTTATATCTATCTATCTATCTATCTATCTATCTATCTATATATATATATATATACATATATAT ++ +CCCCCGGGGGGGFGGGGGGGGGGFGGGGGGGGGCFGGGGGGFGGGGGGGGGGGGGGG@FGGGGGFGGGGGGDGGGGGGGGGGGGGGGGGGFECCEEEFGGGGGGGG>FGG>C+BE,BF:FGFFGGGGGGGGGGGGGGGGGFGGGGGGGFGGGGFDEEGGGGFGGGCFGGGGGGGGFDFDCFCD;E |
b |
diff -r 3f8bf1a0403b -r a3af1ff4cad1 test-data/illuminaPE_r1_no_microsats.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_r1_no_microsats.fq Mon May 14 11:10:19 2018 -0400 |
b |
@@ -0,0 +1,32 @@ +@ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 1:N:0:TCCTGA +TACTGTTTAGAATAGACTGTTCTCCCACTATATTTTGCATTGGTGCATACTCAGCTTTAGTAATAAGTGTGATTCTGGTAGAGAGAGAGAGAGATACCAACCTCTTCTTCCCACTA ++ +IIIIIHIIIGHHIIDGHIIIIIIHIIIIIIIIIIIIHGIIIIHGIIIIHHHIHHGHIIHHEHHIGHGDHDHHHGGHEGAHGDGEHEGEGBGCEDBDDGEEFDEECGDEGDEDEDE8 +@ILLUMINA-545855:49:FC61RLR:2:1:19063:1614 1:N:0:TCCTGA +TATATATATATATACACATATATATATATATTTTTTACATTATTTCACTTCGCCCAAACTAGAGAGTCTAACAAAGTACAACCCAGCATATTAAAGTTCATCTCAGTTTTGTTCTG ++ +IIIIIIIGIEIIIIGIIIHHIHIGIGIIIIIHIIIIHGBEIIIIHIFHHIHFIIIIFHFIHIIHFF>EEEBDGDD;BD8DDBBDDBGHGHHHHEFE=DBCDEEEBEBEGHGAFH@E +@ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 1:N:0:TCCTGA +TCATAAGAATGAGCAGTAAACAAAGGCAAAGGGGAGATAACACACACACACAAAATAAAAAAACATCAATTTCTAATACACGCCTTTATTATAAAGAAATAAATCACTGAAAAACA ++ +DDDDDBDBDDB7?BBB5BB>1?<?88>DDD?4(6367;>?2<164=;>8<0DDD>>A6A?>;;6>+6>><>4-8484888&@D@@DD<D;D>>?<9DD;6DDC<DD;<88@##### +@ILLUMINA-545855:49:FC61RLR:2:1:5879:1238 1:N:0:TCCTGA +TCCCCACCCTGTCATGGTTCTATGTTTTTGTTTTTGTTTTTGTTTTTATGGTTTCCGTATTCCACATTAAAACCTTATGTAACGTACGGGCCAATAAATAGTTACTCGCCATATCC ++ +#################################################################################################################### +@ILLUMINA-545855:49:FC61RLR:2:1:6204:1090 1:N:0:TCCTGA +TGCTTTGGTTCTAAGAGAAAAACAATTATTATAAATGTTTATAATTGATGATAAGCATTTTTGTACAAAGCCAAGACCATTCTGAATGAAGCACCCAAAAAGCCCGGAGGCAACAA ++ +#################################################################################################################### +@ILLUMINA-545855:49:FC61RLR:2:1:8044:1926 1:N:0:TCCTGA +TAGATTTTTTTTTTTATATATATATAAATATAGATGTACATATATTTATATAAATATAAAAGCACAGCATCCTCCTGTCTCTCCTCCTGATTTATTATGGTTAAAGCTTGTGACAG ++ +HHHHHHHHHHHHHHHFHFGFHEFFDF92=@=>;;;=HHHGHHHHHHHHHHHHHHHHGHHHFHHHHFHFDHHHHHHHHHHHHHHHHGHHGGHHHHHHHHHHEHHHHGEHHHEFE>EB +@ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 1:N:0:TCCTGA +TACTAGTCTAATAATTGCAGGCAGCTGAACTAGATAGGTCCTAAAGTACAGTGGGGAGGCTGGTGTGTGTGTGTGCATGGGATTGTCAGCCTTACCATCAGTCCTGATTTGTAGGT ++ +HHHHHHHHHHHHHFHHHHHGHHHHHHGHHHHGHHHHHH>HHGGGGFFHHHHGHGHHFHHHHGGBGDCAC>CBDBDDCFGED@BDDB@BBBBECCD@<CC>C=?9==@CECB=BBB8 +@ILLUMINA-545855:49:FC61RLR:2:1:8899:1514 1:N:0:TCCTGA +TCTTTATCTAAACACATCCTGAAATACCATCTGTTACACACACACACAGCAGTGGAAGTATAAAAAAAAATCTGGACATCTCAAAATAATTGCGTTTCTGAAGTGTTACATTTTTC ++ +IIIIIHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGIIIIIGIIIIIIIIIIIIIIHHGIIIIHIHHHHIHHHHHHHGHHHHHGFHEHEHHHHHHHII> |
b |
diff -r 3f8bf1a0403b -r a3af1ff4cad1 test-data/illuminaPE_r2_bad_ranges.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_r2_bad_ranges.fq Mon May 14 11:10:19 2018 -0400 |
b |
@@ -0,0 +1,24 @@ +@M00879:99:000000000-AH9KG:1:2107:10006:2535 2:N:0:TAAGGCGA+TAGATCGC +CCTCCCAGATGTGAGGTCAGGAATCTCGTCTGGTGTCTATATACTATTGCTACACAAAAATTTGTCAGTTGCCCTGAAGATGACTTGAAATAAAGTCGAAAGCTCGGAGCTGCTCGTTTTCGTCTCCTTTCCAGTGAGATTATACTACATATATATATATATAT ++ +CCCCCGGGGGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG8FFFGGGGGGFGGGGGGGCEFFGGGGGFGGGGGGGGGGGGGGGFGFCEGGFGG8EFEFDFGEFGFFGGCFGGFAFF,9AFDGGGGGGGGGGGGGGEADDFGFEA +@M00879:99:000000000-AH9KG:1:2107:10032:7900 2:N:0:TAAGGCGA+TAGATCGC +CTACAGGGGGCCGAGCACAGAGACTGGTTGATGTATAGAGTTGCTATACTCCTATAGTCCCCCATCGCTTCTATAGCATCTTTCGTGTAGGTGTGTCTATAATGATTGGACATCGAGATAGAAAGGGGACCGGATTGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGATAGAGAGAGAGAGAGAGAGAGAGAGCGAGGAGATAGATAAATACATCGAT ++ +CCCCCGGGGDGGGEGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFGGGGGGGGGGGGGGGGGGGCGFGGGGGGGGGGGGGGGGGGGGGGCGGGGGGGGGFFGGGFGDGGGFGFGGGGGGGGGGEGFFGFFCGFGGGCFG9FFGGGGFCFGGGGGGGGG8DAFGFFGFGGDGDFGFCFFD72DAFF>?BCFFF5>9A>DFFFFFF*):494?>BFEE4<*9*@:=?*-(- +@M00879:99:000000000-AH9KG:1:2107:10061:6317 2:N:0:TAAGGCGA+TAGATCGC +GCCTCGGGACCTTGGCACGCCGCCTCTCCCATAGATGCACCATGAGATCCGTGCAACGGCACAGATCTCTTCTACGGAACTTTTGCCACACCTCACTTCACCTTCTTGCCTTATTCTGCACCACCTGCAGCAGCAGCAGCAGCAGCAGCTCGCCCCGAGTTACATCTATGTACTCTCTCCCTCTCTCTCTCTCTCTCTCTCTCCCTCTCTCTCTCCCTCCCTCTCCTCTCACCCCACACTCACACCCGC ++ +CCCCCFGGGGGGGFFGGGGGGGGGGGGGGGGGGFGGGFEGGGGEGFGGGGGGGGGGGEGGGGGGGGGCGFFGFGGFG@FGGFEF9FGGGCGGEFEFGGGDGG@FE?EEFGGGGG,EE,EFGGFGGFDG,@FFFFG8D8=E8>EEEGC=D=D6CEGC61C=8:ECFG8AA9<2CCC<C?C+=0==DAF9C7;;@0;0@09*@*9*)3;):/;.7.>*7):(54>3-(0(*(0*-(511(6(/(6)6/(,( +@M00879:99:000000000-AH9KG:1:2107:10072:8112 2:N:0:TAAGGCGA+TAGATCGC +GTTCCTGTTATCTTCTTGTTGCTTGGCCTTCGTCACTACATGTAACATGCTGGTAAGCTTGACCGTTGTCATTCCCTGTAACAAACTGATAAGCTTTACCGATGTCATTACATGTAACCAGTTGGTAAGCCTAGCCTTTGTCATCCCATGTAAGAAACTGGTAAGCTTGACAGTCGTCATTCCATATAACAAGCTGAGATAACTTTCACAACCAACACACACACACACACACACACCTGTCTCCTATAC ++ +CCCCCGGGGGGGGGGGGFFGFGGGG7FFGGGGGGGGGGGGGFGFFFAEGGGCFFG@AGGGC,EFGGGGGGGG@EEGGGGFGGFGGGFGGFGCFGGGGGGGGCEEGGCFEFGDEFGGFG,CFFFEGGGDG9EFFFGGGGFAFGGGGG84E=EFGGG;AGDDFFFGGFGG8=DAFGFG=D88FFG@9D@@FDD+;D56D?FFFFFFD=7*;2:)=855)=DF=>=AAD==@DDA)=@@5)):)3;9A***9 +@M00879:99:000000000-AH9KG:1:2107:14372:5471 2:N:0:TAAGGCGA+TAGATCGC +CTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTCTC ++ +ACCCCFGE<FFFGGGGCFFFEF@EGGGGGGGGGGG +@M00879:99:000000000-AH9KG:1:2107:10084:6474 2:N:0:TAAGGCGA+TAGATCGC +ATATATATGTATATATATATATATATAGATAGATAGATAGATAGATAGATAGATAGATATAAAACACGTAAACCTCGACCAGCCAAGATTTGAACTCTGGTCCAGAGCGTGACCAGCGGGAGTGCTACCGTTCGACTATGATCACCCCTAATAGGGAACTGACTATTCAGATACTTAGGTAATTG ++ +CCCCCGGGGGGGGGGGGGGGGGGGCFGFGGGCGGGFGFGFGCFGGFGGGGGGGGGGGGGGGGGGGGFCFFFGGGG<FEFGGFCEFGGGGGGFFAGFGGCCFGGEDGFDGGGGGGGGGG7@FG=AEFGGGGDGC8EGGGGFFEFEGGGGGGGDCF8@FE+==AF9=FFGGFGDGFDA=?DDG+3?9 |
b |
diff -r 3f8bf1a0403b -r a3af1ff4cad1 test-data/illuminaPE_r2_no_microsats.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/illuminaPE_r2_no_microsats.fq Mon May 14 11:10:19 2018 -0400 |
b |
@@ -0,0 +1,32 @@ +@ILLUMINA-545855:49:FC61RLR:2:1:10979:1695 2:N:0:TCCTGA +TACTGTTTAGAAAGCCTGTTCCAGAACTTGATCACTGTCACAGAAAATCTTTCTTACTATCCAGACTGAAGCTACCCTGGTGCAGCTTTGTGCTGTTACCTTGAGTCATGTCATCA ++ +IIIIIHIIIIIHIIIIHIHIIIIIIIHIIIHIIIHIHGIIIIIIHEHHHHHHHHIIHIIHFHHEHHIGHHGHGIHGHHFGAGIEHHGEGHBFIFDBHHHGDFHBDBHGGFGD8EB@ +@ILLUMINA-545855:49:FC61RLR:2:1:19063:1614 2:N:0:TCCTGA +TATATATATATATAAACATATATATATATATTTTTTTCTCATTTCAGAACAAAAGTGAGATGAACTTTAATATGGTGGGGTGTATTTTGAGAGACTCTCTAGTTTGGGAGGAGTGA ++ +DDDDDDDDDDDD:D@D+DDDDDDDDDDDD6DDDDD>A@:5>@########################################################################## +@ILLUMINA-545855:49:FC61RLR:2:1:1978:1220 2:N:0:TCCTGA +TCCTCTGACTAGGCAACAACAGCTTTTTTGCTCCTGGGCAGAGGTGTTCCGAGTGTATATTTTTTATAATTACGGCGCGCATTGGAAATTGATGTTATTTTATTTTGCGTGTGTGT ++ +B?/?################################################################################################################ +@ILLUMINA-545855:49:FC61RLR:2:1:5879:1238 2:N:0:TCCTGA +TCCCCACCCTGTCATGGTTCTATGTATATATATATAGCCATGTGTGTGGTACCAGGGATAGGTACCTGGGATTGGGGCAGTGACACTTTAGTGCCCCGTACACTACATGATGTTTT ++ +HHHHHHHHHHHHHHHHHEHHGGGGFDHGHGHHHHHHFHHHHHGHEHEHEBEHEFB8EEEFEDAGEDBCEBBB@>BEC>@B@DCBBBECBB<AECBBBC>BA>B<;BA@A@###### +@ILLUMINA-545855:49:FC61RLR:2:1:6204:1090 2:N:0:TCCTGA +TGCTTTGGTTCTAAGAGAAAAACAAGTGATGCACAAGCAATTCCTCGCCACCACCCAACTGATGCCCAGCCACCCCCCCAAGCAGTGAAAGAGAGAGAGAGATGAACCCCCTTCAA ++ +HHHDBHHHEFGHHHHEHFHEHDDDDD@HHEHHEEHEFFEEEGDEGGGGEGEB>EBC>@@@@@@BB@FEBFBBB@A>AAA<;>A>;3>=??>>BB>?>@?1?>:9*@########## +@ILLUMINA-545855:49:FC61RLR:2:1:8044:1926 2:N:0:TCCTGA +TCAGGCAAGGTCACTGCCACCACTGGGGAGTGCCTGTTTCTGAAGGGCCCAGCCAACTCTGTCACAAGCTTTAACCATAATAAATCAGGAGGAGAGACAGGAGGATGCTGTGCTTT ++ +IIIIIIIIIIHIIIIIIIIIIIIIIGIIIIFIIIIIGHHIIIIIGGEIHGHGHHHGGCHHCGGGGGGHGHGEGEGEGGGDGBECCBGGEDGB;8?BBBBDABC@:3==;=:<30:6 +@ILLUMINA-545855:49:FC61RLR:2:1:8157:1636 2:N:0:TCCTGA +TAAACAACCAAATGAAACCATCTTTTCTACACAGCTCAAGTAGCCCTACATACAACACAAGCCACCTACAAATCAGGACTGATGGTAAGGCTGACAATCCAATCCACCACAACAAC ++ +HFHHHHHHHHHHHHDHGHHDHHHHHHHHHHHHHHHHHHGGGHHGHHHHHHHHHHHGHHHHH@HHHFHGHFHHEHHHHHHDHBFEBBHFFDHE>EFHBEFD################ +@ILLUMINA-545855:49:FC61RLR:2:1:8899:1514 2:N:0:TCCTGA +TATCATTGAAATTTTTATAAAAACTGTGAAGAGAAAAATGTAACACTTCAGAAACGCAATTATTTTGAGATGTCCAGATTTTTTTTTATACTTCCACTGCTGTGTGTGTGTGTAAC ++ +IG+GGGIIIIIIIIIIDIIIIIIGHHIIHIIIIIEGHIHIHIIHIIIIIIIIIIIIIIHIIDIIIIIEDIIIDIHGHFIIIIIIIIIHIFFBHGIGBGGHBDBFEGEGCGEIEDEB |