genetrack: genetrack_util.py comparison

comparison genetrack_util.py @ 7:a7da50a23270 draft

Uploaded

author	greg
date	Tue, 24 Nov 2015 08:14:42 -0500
parents	a952b6740fb9
children

comparison

equal deleted inserted replaced

-:fa85ca6c9cf8
+:a7da50a23270
 import tempfile
 GFF_EXT = 'gff'
 SCIDX_EXT = 'scidx'
-ROMAN = ['0', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X',
-'XI', 'XII', 'XIII', 'XIV', 'XV', 'XVI', 'XVII', 'XVIII', 'XIX', 'XX',
-'XXI', 'XXII', 'XXIII', 'XXIV', 'XXV', 'XXVI', 'XXVII', 'XXVIII', 'XXIX',
-'XXX']
 def noop(data):
 return data
 def numeric_to_zeropad(data):
 return re.sub(r'chr(\d([^\d]|$))', r'chr0\1', data)
-def roman_to_numeric(data):
+FORMATS = ['zeropad', 'numeric']
-def convert(match):
+IN_CONVERT = {'zeropad': zeropad_to_numeric, 'numeric': noop}
-"""
+OUT_CONVERT = {'zeropad': numeric_to_zeropad, 'numeric': noop}
-Converts a single roman numeral to a number
-"""
-numeral = match.group(1)
-numeral = numeral.upper()
-if numeral not in ROMAN:
-# Unable to convert detected Roman numeral
-return match.group(0)
-return 'chr'+str(ROMAN.index(numeral))+(match.group(2) or '')
-r = re.compile('chr([IVX]+)([^IVX]|$)', flags=re.IGNORECASE)
-data = r.sub(convert, data)
-return data
-def numeric_to_roman(data):
-def convert(match):
-"""
-Converts a number to a roman numeral
-"""
-number = int(match.group(1))
-if number >= len(ROMAN):
-# Number is out of range to convert to a Roman numeral
-return match.group(0)
-return 'chr'+ROMAN[number]+(match.group(2) or '')
-r = re.compile('chr(\d+)([^\d]|$)')
-data = r.sub(convert,  data)
-return data
-FORMATS = ['zeropad', 'numeric', 'roman']
-IN_CONVERT = {'zeropad': zeropad_to_numeric, 'roman': roman_to_numeric, 'numeric': noop}
-OUT_CONVERT = {'zeropad': numeric_to_zeropad, 'roman': numeric_to_roman, 'numeric': noop}
 def conversion_functions(in_fmt, out_fmt):
 """
 Returns the proper list of functions to apply to perform a conversion
 """
 return [IN_CONVERT[in_fmt], OUT_CONVERT[out_fmt]]
-def autodetect_format(data):
-if re.search('chr0\d', data):
-fmt = 'zeropad'
-elif re.search('chr[IVXivx]', data):
-fmt = 'roman'
-else:
-fmt = 'numeric'
-return fmt
 def convert_data(data, in_fmt, out_fmt):
-if in_fmt == 'autodetect':
-in_fmt = autodetect_format(data)
 for fn in conversion_functions(in_fmt, out_fmt):
 data = fn(data)
 return data
 for peak in reverse_peaks:
 if process_bounds[0] < peak.index < process_bounds[1]:
 write(cname, '-', peak)
-def sort_chromosome_reads_by_index( input_path ):
+def sort_chromosome_reads_by_index(input_path):
 """
 Return a gff file with chromosome reads sorted by index.
 """
 # Will this sort produce different results across platforms?
-output_path = tempfile.NamedTemporaryFile( delete=False ).name
+output_path = tempfile.NamedTemporaryFile(delete=False).name
 command = 'sort -k 1,1 -k 4,4n "%s" > "%s"' % (input_path, output_path)
 p = subprocess.Popen(command, shell=True)
 p.wait()
 return output_path

Mercurial > repos > greg > genetrack

comparison genetrack_util.py @ 7:a7da50a23270 draft