Mercurial > repos > iuc > fasta_regex_finder

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastaregexfinder.py	Wed Jan 25 14:03:40 2023 +0000
@@ -0,0 +1,281 @@
+#!/usr/bin/env python
+
+import argparse
+import operator
+import re
+import sys
+
+VERSION = "0.1.1"
+
+parser = argparse.ArgumentParser(
+    description="""
+
+DESCRIPTION
+
+    Search a fasta file for matches to a regex and return a bed file with the
+    coordinates of the match and the matched sequence itself.
+
+    Output bed file has columns:
+    1. Name of fasta sequence (e.g. chromosome)
+    2. Start of the match
+    3. End of the match
+    4. ID of the match
+    5. Length of the match
+    6. Strand
+    7. Matched sequence as it appears on the forward strand
+
+    For matches on the reverse strand it is reported the start and end position on the
+    forward strand and the matched string on the forward strand (so the G4 'GGGAGGGT'
+    present on the reverse strand is reported as ACCCTCCC).
+
+    Note: Fasta sequences (chroms) are read in memory one at a time along with the
+    matches for that chromosome.
+    The order of the output is: chroms as they are found in the inut fasta, matches
+    sorted within chroms by positions.
+
+EXAMPLE:
+    ## Test data:
+    echo '>mychr' > /tmp/mychr.fa
+    echo 'ACTGnACTGnACTGnTGAC' >> /tmp/mychr.fa
+
+    fastaRegexFinder.py -f /tmp/mychr.fa -r 'ACTG'
+        mychr	0	4	mychr_0_4_for	4	+	ACTG
+        mychr	5	9	mychr_5_9_for	4	+	ACTG
+        mychr	10	14	mychr_10_14_for	4	+	ACTG
+
+    fastaRegexFinder.py -f /tmp/mychr.fa -r 'ACTG' --maxstr 3
+        mychr	0	4	mychr_0_4_for	4	+	ACT[3,4]
+        mychr	5	9	mychr_5_9_for	4	+	ACT[3,4]
+        mychr	10	14	mychr_10_14_for	4	+	ACT[3,4]
+
+    less /tmp/mychr.fa | fastaRegexFinder.py -f - -r 'A\\w\\wGn'
+        mychr	0	5	mychr_0_5_for	5	+	ACTGn
+        mychr	5	10	mychr_5_10_for	5	+	ACTGn
+        mychr	10	15	mychr_10_15_for	5	+	ACTGn
+
+DOWNLOAD
+    fastaRegexFinder.py is hosted at https://github.com/dariober/bioinformatics-cafe/tree/master/fastaRegexFinder
+
+    """,
+    formatter_class=argparse.RawTextHelpFormatter,
+)
+
+parser.add_argument(
+    "--fasta",
+    "-f",
+    type=str,
+    help="Input fasta file to search. Use '-' to read the file from stdin.",
+    required=True,
+)
+
+parser.add_argument(
+    "--regex",
+    "-r",
+    type=str,
+    help="""Regex to be searched in the fasta input.
+Matches to the reverse complement will have - strand.
+The default regex is '([gG]{3,}\\w{1,7}){3,}[gG]{3,}' which searches
+for G-quadruplexes.""",
+    default="([gG]{3,}\\w{1,7}){3,}[gG]{3,}",
+)
+
+parser.add_argument(
+    "--matchcase",
+    "-m",
+    action="store_true",
+    help="""Match case while searching for matches. Default is
+to ignore case (I.e. 'ACTG' will match 'actg').
+                   """,
+)
+
+parser.add_argument(
+    "--noreverse",
+    action="store_true",
+    help="""Do not search the reverse complement of the input fasta.
+Use this flag to search protein sequences.""",
+)
+
+parser.add_argument(
+    "--maxstr",
+    type=int,
+    required=False,
+    default=10000,
+    help="""Maximum length of the match to report in the 7th column of the output.
+Default is to report up to 10000nt.
+Truncated matches are reported as <ACTG...ACTG>[<maxstr>,<tot length>]
+                   """,
+)
+
+parser.add_argument(
+    "--seqnames",
+    "-s",
+    type=str,
+    nargs="+",
+    default=[None],
+    required=False,
+    help="""List of fasta sequences in --fasta to
+search. E.g. use --seqnames chr1 chr2 chrM to search only these crhomosomes.
+Default is to search all the sequences in input.
+                   """,
+)
+parser.add_argument(
+    "--quiet",
+    "-q",
+    action="store_true",
+    help="""Do not print progress report (i.e. sequence names as they are scanned).
+                   """,
+)
+
+
+parser.add_argument("--version", "-v", action="version", version="%(prog)s " + VERSION)
+
+
+args = parser.parse_args()
+
+" --------------------------[ Check and parse arguments ]---------------------- "
+
+if args.matchcase:
+    flag = 0
+else:
+    flag = re.IGNORECASE
+
+" ------------------------------[  Functions ]--------------------------------- "
+
+
+def sort_table(table, cols):
+    """Code to sort list of lists
+    see http://www.saltycrane.com/blog/2007/12/how-to-sort-table-by-columns-in-python/
+
+    sort a table by multiple columns
+        table: a list of lists (or tuple of tuples) where each inner list
+               represents a row
+        cols:  a list (or tuple) specifying the column numbers to sort by
+               e.g. (1,0) would sort by column 1, then by column 0
+    """
+    for col in reversed(cols):
+        table = sorted(table, key=operator.itemgetter(col))
+    return table
+
+
+def trimMatch(x, n):
+    """Trim the string x to be at most length n. Trimmed matches will be reported
+    with the syntax ACTG[a,b] where Ns are the beginning of x, a is the length of
+    the trimmed strng (e.g 4 here) and b is the full length of the match
+    EXAMPLE:
+        trimMatch('ACTGNNNN', 4)
+        >>>'ACTG[4,8]'
+        trimMatch('ACTGNNNN', 8)
+        >>>'ACTGNNNN'
+    """
+    if len(x) > n and n is not None:
+        m = x[0:n] + "[" + str(n) + "," + str(len(x)) + "]"
+    else:
+        m = x
+    return m
+
+
+def revcomp(x):
+    """Reverse complement string x. Ambiguity codes are handled and case conserved.
+
+    Test
+    x= 'ACGTRYSWKMBDHVNacgtryswkmbdhvn'
+    revcomp(x)
+    """
+    compdict = {
+        "A": "T",
+        "C": "G",
+        "G": "C",
+        "T": "A",
+        "R": "Y",
+        "Y": "R",
+        "S": "W",
+        "W": "S",
+        "K": "M",
+        "M": "K",
+        "B": "V",
+        "D": "H",
+        "H": "D",
+        "V": "B",
+        "N": "N",
+        "a": "t",
+        "c": "g",
+        "g": "c",
+        "t": "a",
+        "r": "y",
+        "y": "r",
+        "s": "w",
+        "w": "s",
+        "k": "m",
+        "m": "k",
+        "b": "v",
+        "d": "h",
+        "h": "d",
+        "v": "b",
+        "n": "n",
+    }
+    xrc = []
+    for n in x:
+        xrc.append(compdict[n])
+    xrc = "".join(xrc)[::-1]
+    return xrc
+
+
+# -----------------------------------------------------------------------------
+
+psq_re_f = re.compile(args.regex, flags=flag)
+# psq_re_r= re.compile(regexrev)
+
+if args.fasta != "-":
+    ref_seq_fh = open(args.fasta)
+else:
+    ref_seq_fh = sys.stdin
+
+ref_seq = []
+line = (ref_seq_fh.readline()).strip()
+chr = re.sub("^>", "", line)
+line = (ref_seq_fh.readline()).strip()
+gquad_list = []
+while True:
+    if not args.quiet:
+        sys.stderr.write("Processing %s\n" % (chr))
+    while line.startswith(">") is False:
+        ref_seq.append(line)
+        line = (ref_seq_fh.readline()).strip()
+        if line == "":
+            break
+    ref_seq = "".join(ref_seq)
+    if args.seqnames == [None] or chr in args.seqnames:
+        for m in re.finditer(psq_re_f, ref_seq):
+            matchstr = trimMatch(m.group(0), args.maxstr)
+            quad_id = str(chr) + "_" + str(m.start()) + "_" + str(m.end()) + "_for"
+            gquad_list.append(
+                [chr, m.start(), m.end(), quad_id, len(m.group(0)), "+", matchstr]
+            )
+        if args.noreverse is False:
+            ref_seq = revcomp(ref_seq)
+            seqlen = len(ref_seq)
+            for m in re.finditer(psq_re_f, ref_seq):
+                matchstr = trimMatch(revcomp(m.group(0)), args.maxstr)
+                mstart = seqlen - m.end()
+                mend = seqlen - m.start()
+                quad_id = str(chr) + "_" + str(mstart) + "_" + str(mend) + "_rev"
+                gquad_list.append(
+                    [chr, mstart, mend, quad_id, len(m.group(0)), "-", matchstr]
+                )
+        gquad_sorted = sort_table(gquad_list, (1, 2, 3))
+        gquad_list = []
+        for xline in gquad_sorted:
+            xline = "\t".join([str(x) for x in xline])
+            print(xline)
+    chr = re.sub("^>", "", line)
+    ref_seq = []
+    line = (ref_seq_fh.readline()).strip()
+    if line == "":
+        break
+
+# gquad_sorted= sort_table(gquad_list, (0,1,2,3))
+#
+# for line in gquad_sorted:
+#    line= '\t'.join([str(x) for x in line])
+#    print(line)
+sys.exit()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastaregexfinder.xml	Wed Jan 25 14:03:40 2023 +0000
@@ -0,0 +1,162 @@
+<tool id="fasta_regex_finder" name="Fasta regular expression finder" version="0.1.0">
+    <description>
+        Search in fasta for regexp match
+    </description>
+    <requirements>
+        <requirement type="package" version="3.8">python</requirement>
+    </requirements>
+    <version_command>python '$__tool_directory__/fastaregexfinder.py' --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+python '$__tool_directory__/fastaregexfinder.py'
+--fasta '$input'
+--regex '$regex'
+#if $settings.advanced == "advanced"
+    $settings.matchcase
+    $settings.noreverse
+    --maxstr $settings.maxstr
+    #if $settings.seqnames != ""
+        --seqnames $settings.seqnames
+    #end if
+#end if
+--quiet
+> '$output'
+    ]]></command>
+    <inputs>
+        <param type="data" name="input" format="fasta" />
+        <param name="regex" size="30" type="text" value="([gG]{3,}\w{1,7}){3,}[gG]{3,}" label="Regular expression" help="(--regex)">
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+                <mapping initial="none">
+                    <add source="&apos;" target="__sq__"/>
+                </mapping>
+            </sanitizer>
+        </param>
+        <conditional name="settings">
+            <param name="advanced" type="select" label="Specify advanced parameters">
+                <option value="simple" selected="true">No, use program defaults.</option>
+                <option value="advanced">Yes, see full parameter list.</option>
+            </param>
+            <when value="simple">
+            </when>
+            <when value="advanced">
+                <param name="matchcase" type="boolean" label="Match case" truevalue="--matchcase" falsevalue="" help="(--matchcase)" />
+                <param name="noreverse" type="boolean" label="Do not search the reverse complement" truevalue="--noreverse" falsevalue="" help="(--noreverse)" />
+                <param name="maxstr" type="integer" label="Maximum length of the match to report" value="10000" min="1" help="(--maxstr)" />
+                <param name="seqnames" size="30" type="text" value="" label="Space separated list of fasta sequences to search" help="--seqnames"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" format="bed" from_work_dir="TestSeqGroup-G4.bed" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="TestSeqGroup-G4.fasta"/>
+            <output name="output" file="TestSeqGroup-G4.bed"/>
+        </test>
+        <test>
+            <param name="input" value="test.fas"/>
+            <param name="regex" value="ACTG"/>
+            <output name="output" file="test-1.bed"/>
+        </test>
+        <test>
+            <param name="input" value="test.fas"/>
+            <param name="regex" value="ACTG"/>
+            <param name="advanced" value="advanced"/>
+            <param name="matchcase" value="--matchcase"/>
+            <output name="output" file="test-2.bed"/>
+        </test>
+        <test>
+            <param name="input" value="test.fas"/>
+            <param name="regex" value="ACTG"/>
+            <param name="advanced" value="advanced"/>
+            <param name="noreverse" value="--noreverse"/>
+            <output name="output" file="test-3.bed"/>
+        </test>
+        <test>
+            <param name="input" value="test.fas"/>
+            <param name="regex" value="ACTG"/>
+            <param name="advanced" value="advanced"/>
+            <param name="maxstr" value="3"/>
+            <output name="output" file="test-4.bed"/>
+        </test>
+        <test>
+            <param name="input" value="TestSeqGroup-G4.fasta"/>
+            <param name="advanced" value="advanced"/>
+            <param name="seqnames" value="HJ24-Shp2_oncogenicProtein2 HJ24-Shp2_oncogenicProtein"/>
+            <output name="output" file="TestSeqGroup-G4-sub.bed"/>
+        </test>
+</tests>
+    <help><![CDATA[
+DESCRIPTION
+
+Search a fasta file for matches to a regular expression and return a bed file with the
+coordinates of the match and the matched sequence itself.
+
+Output bed file has columns:
+
+1. Name of fasta sequence (e.g. chromosome)
+2. Start of the match
+3. End of the match
+4. ID of the match
+5. Length of the match
+6. Strand
+7. Matched sequence as it appears on the forward strand
+
+For matches on the reverse strand it is reported the start and end position on the
+forward strand and the matched string on the forward strand (so the G4 'GGGAGGGT'
+present on the reverse strand is reported as ACCCTCCC).
+
+
+Note: Fasta sequences (chroms) are read in memory one at a time along with the
+matches for that chromosome.
+The order of the output is: chroms as they are found in the inut fasta, matches
+sorted within chroms by positions.
+
+ARGUMENTS:
+
+- regex Regex to be searched in the fasta input. Matches to the reverse complement will have - strand. The default regex is '([gG]{3,}\w{1,7}){3,}[gG]{3,}' which searches for G-quadruplexes.
+- matchcase Match case while searching for matches. Default is to ignore case (I.e. 'ACTG' will match 'actg').
+- noreverse           Do not search the reverse complement of the input fasta. Use this flag to search protein sequences.
+- maxstr Maximum length of the match to report in the 7th column of the output. Default is to report up to 10000nt. Truncated matches are reported as <ACTG...ACTG>[<maxstr>,<tot length>]
+- seqnames List of fasta sequences in the input to search. E.g. use --seqnames chr1 chr2 chrM to search only these crhomosomes. Default is to search all the sequences in input.
+
+EXAMPLE:
+
+Test data::
+>mychr
+ACTGnACTGnACTGnTGAC
+
+Example1 regex=ACTG::
+
+        mychr	0	4	mychr_0_4_for	4	+	ACTG
+        mychr	5	9	mychr_5_9_for	4	+	ACTG
+        mychr	10	14	mychr_10_14_for	4	+	ACTG
+
+Example2 regex=ACTG maxstr=3::
+
+        mychr	0	4	mychr_0_4_for	4	+	ACT[3,4]
+        mychr	5	9	mychr_5_9_for	4	+	ACT[3,4]
+        mychr	10	14	mychr_10_14_for	4	+	ACT[3,4]
+
+Example3 regex=A\w\wG::
+
+        mychr	0	5	mychr_0_5_for	5	+	ACTGn
+        mychr	5	10	mychr_5_10_for	5	+	ACTGn
+        mychr	10	15	mychr_10_15_for	5	+	ACTGn
+
+   ]]></help>
+    <citations>
+        <citation type="bibtex">
+@misc{githubfastaRegexFinder,
+  author = {Dario Beraldi},
+  year = {2017},
+  title = {fastaRegexFinder},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/dariober/bioinformatics-cafe/tree/master/fastaRegexFinder},
+}</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TestSeqGroup-G4-sub.bed	Wed Jan 25 14:03:40 2023 +0000
@@ -0,0 +1,2 @@
+HJ24-Shp2_oncogenicProtein	17	58	HJ24-Shp2_oncogenicProtein_17_58_for	41	+	GGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGG
+HJ24-Shp2_oncogenicProtein2	17	58	HJ24-Shp2_oncogenicProtein2_17_58_for	41	+	GGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TestSeqGroup-G4.bed	Wed Jan 25 14:03:40 2023 +0000
@@ -0,0 +1,3 @@
+HJ24-Shp2_oncogenicProtein	17	58	HJ24-Shp2_oncogenicProtein_17_58_for	41	+	GGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGG
+HJ24-Shp2_oncogenicProtein2	17	58	HJ24-Shp2_oncogenicProtein2_17_58_for	41	+	GGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGG
+HJ24-Shp2_oncogenicProtein3	17	58	HJ24-Shp2_oncogenicProtein3_17_58_for	41	+	GGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TestSeqGroup-G4.fasta	Wed Jan 25 14:03:40 2023 +0000
@@ -0,0 +1,56 @@
+>PA#2/8
+ATACCAGCTTATTCAATTAGCAACATGAGGGGGATAGAGGGGGTGGGTTCTCTCGGCTACAATCGTAATCAGTTAG
+>PA#14/89
+ATACCAGCTTATTCAATTGGGCACCACGGGAGTCGGCCACATTTGGAGTTGTTTTTGCACAATCGTAATCAGTTAG
+>PA-C10
+ATACCAGCTTATTCAATTGCGCACCACGGGAGTTGGCCACATTTGGAGTTGTTTTTGCACAATCGTAATCAGTTAG
+>PA#4/22
+ATACCAGCTTATTCAATTGCAGTACTGATGAGTGTAGCCGTATGATTATCGTTTGTGGACAATCGTAATCAGTTAG
+>PA#4/34
+ATACCAGCTTATTCAATTCCCCAACGAGTCGATATGTAGCCCACACTCTGATTCGTCCACAATCGTAATCAGTTAG
+>PA#2/11
+ATACCAGCTTATTCAATTGGAGACGACAAACTATTACGTACTACGGCATGCACTTGGTACAATCGTAATCAGTTAG
+>PA-C8
+ATACCAGCTTATTCAATTAGGCCAGATGAGGGGTGCCCATGGCGGGGTGGCTGCTCCAACAATCGTAATCAGTTAG
+>PA#14/82
+ATACCAGCTTATTCAATTCCACAACCGAACTCGTAAGACGTATGTAGCCGCCAACTGTACAATCGTAATCAGTTAG
+>PA#2/3
+ATACCAGCTTATTCAATTCGACAAGTGGGCATTACGATTCTAGCCCTGATTATGTTCCACAATCGTAATCAGTTAG
+>PA-C9
+ATACCAGCTTATTCAATTACCGAGGAGATAACGTTGTAGCCGTCCATCATCTGATTCGACAATCGTAATCAGTTAG
+>PA#2/6
+ATACCAGCTTATTCAATTACCGATCACTAGCCGACTAATTGGTTTCCGATCGCAGTCCACAATCGTAATCAGTTAG
+>PA-C11
+ATACCAGCTTATTCAATTCGATGGAGCTGATGATTGTTGCCGATCTGACTGTTGTTCCACAATCGTAATCAGTTAG
+>PA-C13
+ATACCAGCTTATTCAATTCCCCTAACGTTACTGGATGTAGTCCGACTAACTTATGCGTACAATCGTAATCAGTTAG
+>PA-C42
+ATACCAGCTTATTCAATTGCAGATTACGCCTTGTAGCCCGCACTGATCTCGATGTTTGGACAATCGTAATCAGTTAG
+>PA-C16
+ATACCAGCTTATTCAATTCCCACGAGTGTAGCCGATTCTTCTGTACTCTTGTCCTCGTACAATCGTAATCAGTTAG
+>PA-C15
+ATACCAGCTTATTCAATTACGTGTTGTAGCCGACCCCTGTTGATTGTTTTCCTGTACCACAATCGTAATCAGTTAG
+>EA#14.3
+ATACCAGCTTATTCAATTTGAGGCGGGTGGGTGGGTTGAATACGCTGATTACCCCATCGGAGAACGTTAAGGCGCTTCAGATAGTAAGTGCAATCT
+>EA#9.4
+ATACCAGCTTATTCAATTGCTGCGAGGTGGGTGGGTGGGAGCAATTGATCCTCGCTTAGCTTCTACGGTGGGCTATCTAGATAGTAAGTGCAATCT
+>EA#5.10
+ATACCAGCTTATTCAATTCACCACACCTGCACCCCTGACTTCCCACTTATATCTACTACTCCGTCTCAAGCCCGTTTGAGATAGTAAGTGCAATCT
+>EA#14.5
+ATACCAGCTTATTCAATTCCGAGTTTGGGTGGGAGTGGTGGGTTCGGAATTGTTAGTTATTTGGGTTTATGCGAGGTGAGATAGTAAGTGCAATCT
+>EA#14.8
+ATACCAGCTTATTCAATTGACGGGGTGTTGTCGTATGCTGTAGAAGCCGTAATTTTTTTTGTTTTCCCTGCCCACCTAGATAGTAAGTGCAATCT
+>EA#14.4
+ATACCAGCTTATTCAATTCCACAGGTTGTATGGGGAATAAGGTGGGTGCGCGAGATAGTAAGTGCAATCT
+>EA#11.5
+ATACCAGCTTATTCAATTCCCACACCCTAACCGTAGAGCTAAGCTTTTCTTACTACTGACAGTGCTTTACCGTTTGCAAGATAGTAAGTGCAATCT
+>HJ24-Shp2_oncogenicProtein
+AGCGTCGAATACCACACGGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGGCTAATGGAGCTCGTGGTCAT
+>HJ24-Shp2_oncogenicProtein2
+AGCGTCGAATACCACACGGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGGCTAATGGAGCTCGTGGTCAT
+>HJ24-Shp2_oncogenicProtein3
+AGCGTCGAATACCACACGGGGGTTTTGGTGGGGGGGGCTGGGTTGTCTTGGGGGTGGGCTAATGGAGCTCGTGGTCAT
+>RT6-HIVRT
+ATCCGCCTGATTAGCGATACTCAGGCGTTAGGGAAGGGCGTCGAAAGCAGGGTGGGACTTGAGCAAAATCACCTGCAGGGG
+>AptG4-HumanRNaseH1
+CGGTCGCTCCGTGTGGCTTGGGTTGGGTGTGGCAGTGAC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-1.bed	Wed Jan 25 14:03:40 2023 +0000
@@ -0,0 +1,3 @@
+mychr	0	4	mychr_0_4_for	4	+	ACTG
+mychr	5	9	mychr_5_9_for	4	+	actg
+mychr	10	14	mychr_10_14_rev	4	-	CAGT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-2.bed	Wed Jan 25 14:03:40 2023 +0000
@@ -0,0 +1,2 @@
+mychr	0	4	mychr_0_4_for	4	+	ACTG
+mychr	10	14	mychr_10_14_rev	4	-	CAGT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-3.bed	Wed Jan 25 14:03:40 2023 +0000
@@ -0,0 +1,2 @@
+mychr	0	4	mychr_0_4_for	4	+	ACTG
+mychr	5	9	mychr_5_9_for	4	+	actg
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-4.bed	Wed Jan 25 14:03:40 2023 +0000
@@ -0,0 +1,3 @@
+mychr	0	4	mychr_0_4_for	4	+	ACT[3,4]
+mychr	5	9	mychr_5_9_for	4	+	act[3,4]
+mychr	10	14	mychr_10_14_rev	4	-	CAG[3,4]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.fas	Wed Jan 25 14:03:40 2023 +0000
@@ -0,0 +1,2 @@
+>mychr
+ACTGnactgnCAGTnTGAC