Mercurial > repos > greg > extract_genomic_dna
changeset 16:030691e5cc86 draft
Uploaded
author | greg |
---|---|
date | Thu, 03 Mar 2016 14:52:26 -0500 |
parents | e70f5ca82b63 |
children | 24c055f48065 |
files | extract_genomic_dna_utils.py |
diffstat | 1 files changed, 24 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/extract_genomic_dna_utils.py Wed Mar 02 16:30:29 2016 -0500 +++ b/extract_genomic_dna_utils.py Thu Mar 03 14:52:26 2016 -0500 @@ -292,17 +292,34 @@ stop_err('Error running faToTwoBit. ' + str(e)) -def get_description_field_delimiter(description_field_delimiter): - # Convert a word to an appropriate character. - if description_field_delimiter == 'underscore': +def get_bedtools_getfasta_default_header(chrom, start, end, strand, includes_strand_col): + """ + Return a fasta header that is the default produced by the bedtools + getfasta tool, assuming "force strandedness". This will produce a + header with this format: <chrom>:<start>-<end>(strand). If the input + data includes a strand column and the strand is '+' or '-', then use it. + If the input data includes a strand column and the value of strand is + anything but '+' or '-', set strand to '.' in the header. If the input + data does not include a strand column, set strand to '.' in the header. + """ + if includes_strand_col and strand in ['+', '-']: + strand_val = strand + else: + strand_val = '.' + return '%s:%s-%s(%s)' % (chrom, start, end, strand_val) + + +def get_fasta_header_delimiter(delimiter): + # Return a specified fasta header delimiter. + if delimiter == 'underscore': return '_' - if description_field_delimiter == 'semicolon': + if delimiter == 'semicolon': return ';' - if description_field_delimiter == 'comma': + if delimiter == 'comma': return ',' - if description_field_delimiter == 'tilda': + if delimiter == 'tilda': return '~' - if description_field_delimiter == 'vetical_bar': + if delimiter == 'vetical_bar': return '|' # Set the default to underscore. return '_'