changeset 7:a7da50a23270 draft

Uploaded
author greg
date Tue, 24 Nov 2015 08:14:42 -0500
parents fa85ca6c9cf8
children e80af9b30cbb
files genetrack_util.py
diffstat 1 files changed, 5 insertions(+), 52 deletions(-) [+]
line wrap: on
line diff
--- a/genetrack_util.py	Sat Nov 21 08:57:37 2015 -0500
+++ b/genetrack_util.py	Tue Nov 24 08:14:42 2015 -0500
@@ -9,11 +9,6 @@
 GFF_EXT = 'gff'
 SCIDX_EXT = 'scidx'
 
-ROMAN = ['0', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X',
-         'XI', 'XII', 'XIII', 'XIV', 'XV', 'XVI', 'XVII', 'XVIII', 'XIX', 'XX',
-         'XXI', 'XXII', 'XXIII', 'XXIV', 'XXV', 'XXVI', 'XXVII', 'XXVIII', 'XXIX',
-         'XXX']
-
 
 def noop(data):
     return data
@@ -27,39 +22,9 @@
     return re.sub(r'chr(\d([^\d]|$))', r'chr0\1', data)
 
 
-def roman_to_numeric(data):
-    def convert(match):
-        """
-        Converts a single roman numeral to a number
-        """
-        numeral = match.group(1)
-        numeral = numeral.upper()
-        if numeral not in ROMAN:
-            # Unable to convert detected Roman numeral
-            return match.group(0)
-        return 'chr'+str(ROMAN.index(numeral))+(match.group(2) or '')
-    r = re.compile('chr([IVX]+)([^IVX]|$)', flags=re.IGNORECASE)
-    data = r.sub(convert, data)
-    return data
-
-
-def numeric_to_roman(data):
-    def convert(match):
-        """
-        Converts a number to a roman numeral
-        """
-        number = int(match.group(1))
-        if number >= len(ROMAN):
-            # Number is out of range to convert to a Roman numeral
-            return match.group(0)
-        return 'chr'+ROMAN[number]+(match.group(2) or '')
-    r = re.compile('chr(\d+)([^\d]|$)')
-    data = r.sub(convert,  data)
-    return data
-
-FORMATS = ['zeropad', 'numeric', 'roman']
-IN_CONVERT = {'zeropad': zeropad_to_numeric, 'roman': roman_to_numeric, 'numeric': noop}
-OUT_CONVERT = {'zeropad': numeric_to_zeropad, 'roman': numeric_to_roman, 'numeric': noop}
+FORMATS = ['zeropad', 'numeric']
+IN_CONVERT = {'zeropad': zeropad_to_numeric, 'numeric': noop}
+OUT_CONVERT = {'zeropad': numeric_to_zeropad, 'numeric': noop}
 
 
 def conversion_functions(in_fmt, out_fmt):
@@ -69,19 +34,7 @@
     return [IN_CONVERT[in_fmt], OUT_CONVERT[out_fmt]]
 
 
-def autodetect_format(data):
-    if re.search('chr0\d', data):
-        fmt = 'zeropad'
-    elif re.search('chr[IVXivx]', data):
-        fmt = 'roman'
-    else:
-        fmt = 'numeric'
-    return fmt
-
-
 def convert_data(data, in_fmt, out_fmt):
-    if in_fmt == 'autodetect':
-        in_fmt = autodetect_format(data)
     for fn in conversion_functions(in_fmt, out_fmt):
         data = fn(data)
     return data
@@ -425,12 +378,12 @@
             write(cname, '-', peak)
 
 
-def sort_chromosome_reads_by_index( input_path ):
+def sort_chromosome_reads_by_index(input_path):
     """
     Return a gff file with chromosome reads sorted by index.
     """
     # Will this sort produce different results across platforms?
-    output_path = tempfile.NamedTemporaryFile( delete=False ).name
+    output_path = tempfile.NamedTemporaryFile(delete=False).name
     command = 'sort -k 1,1 -k 4,4n "%s" > "%s"' % (input_path, output_path)
     p = subprocess.Popen(command, shell=True)
     p.wait()