Mercurial > repos > iuc > reprof
view reprof.py @ 14:8c83f1fb30f6 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/reprof commit 98288bb0ee8760018924574b81c8759bac55b529
| author | iuc |
|---|---|
| date | Fri, 11 Dec 2015 14:26:29 -0500 |
| parents | fb0936cf5bef |
| children |
line wrap: on
line source
#!/usr/bin/env python import re import os import argparse import subprocess import tempfile from Bio import SeqIO from BCBio import GFF from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord from Bio.SeqFeature import SeqFeature, FeatureLocation def run_reprof(query_path, modeldir): outtmp = tempfile.NamedTemporaryFile(delete=False) cmd = [ 'reprof', '-i', query_path, '--modeldir=%s' % modeldir, '-o', outtmp.name ] subprocess.check_call(cmd) outtmp.seek(0) data = outtmp.read() outtmp.close() os.unlink(outtmp.name) return data def process_reprof_report(data): KEYS = ['idx', 'AA', 'PHEL', 'RI_S', 'pH', 'pE', 'pL', 'PACC', 'PREL', 'P10', 'RI_A', 'Pbe', 'Pbie'] data_tracks = {k: [] for k in KEYS} for line in data.split('\n'): if line.startswith('#') or line.startswith('No') or len(line.strip()) == 0: continue for idx, (key, value) in enumerate(zip(KEYS, line.strip().split('\t'))): # numerical columns if idx not in (1, 2, 11, 12): value = int(value) data_tracks[key].append(value) return data_tracks def storeWigData(idx, data, id, path): with open(path, 'a') as handle: handle.write('variableStep chrom=%s\n' % id) for (pos, val) in zip(idx, data): handle.write('%s %s\n' % (pos, val)) def storeGff3Data(path, id, positions, values, decodeMap): merged = ''.join(values) # http://stackoverflow.com/a/19683549 regions = [(x[0][0].upper(), len(x[0])) for x in re.findall('((.)(\\2*))', merged)] location = 1 rec = SeqRecord(Seq("ACTG"), id=id) for (region_char, region_length) in regions: # If we don't wish to decode this region, skip it. if region_char not in decodeMap: location += region_length continue region_info = decodeMap[region_char] # Create a feature representing this region region_feat = SeqFeature( FeatureLocation(location - 1, location - 1 + region_length), type=region_info['type'], strand=0, qualifiers={k: v for (k, v) in region_info.iteritems() if k != 'type'} ) # Update our start location location += region_length rec.features.append(region_feat) with open(path, 'a') as handle: GFF.write([rec], handle) def storeClassData(path, id, phel): h = float(phel.count('H')) / float(len(phel)) e = float(phel.count('E')) / float(len(phel)) if h > .45 and e < .05: classification = 'all-alpha' elif h < .05 and e > .45: classification = 'all-beta' elif h > .3 and e > .2: classification = 'alpha-beta' else: classification = 'mixed' with open(path, 'a') as handle: handle.write('# id\t% alpha\t% beta\tclass\n') handle.write('{0}\t{1:.2f}\t{2:.2f}\t{3}'.format(id, h, e, classification)) def main(fasta, modeldir): for record in SeqIO.parse(fasta, 'fasta'): tmp = tempfile.NamedTemporaryFile(delete=False) SeqIO.write([record], tmp, 'fasta') tmp.close() # Run reprof data = process_reprof_report(run_reprof(tmp.name, modeldir)) for col in ('RI_S', 'P10', 'RI_A', 'PACC', 'PREL', 'pH', 'pE', 'pL'): storeWigData(data['idx'], data[col], record.id, col + '.wig') eco = ['{ECO:0000255|reprof_1.0.1}'] storeGff3Data( 'secondary_structure.gff3', record.id, data['idx'], data['PHEL'], { 'H': { 'type': 'peptide_helix', 'description': ['Helix'], 'evidence': eco, }, 'E': { 'type': 'beta_strand', 'description': ['Extended/Sheet'], 'evidence': eco, }, 'L': { 'type': 'loop', 'description': ['Loop'], 'evidence': eco, } } ) storeGff3Data( 'solvent_accessibility.gff3', record.id, data['idx'], data['Pbe'], { 'B': { 'type': 'experimental_result_region', 'description': ['Buried'], 'evidence': eco, }, 'E': { 'type': 'experimental_result_region', 'description': ['Exposed'], 'evidence': eco, }, } ) storeClassData('report.tsv', record.id, data['PHEL']) if __name__ == '__main__': # Grab all of the filters from our plugin loader parser = argparse.ArgumentParser(description='Wrapper for reprof') parser.add_argument('fasta', type=file, help='Fasta Input') parser.add_argument('modeldir') args = parser.parse_args() main(**vars(args))
