Mercurial > repos > iuc > reprof
view reprof.py @ 3:8a1cd8a32a72 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/reprof commit 746aa691891448484ef9f6417c708b6c7e8f60b0
| author | iuc |
|---|---|
| date | Wed, 02 Dec 2015 16:34:05 -0500 |
| parents | 141da185be70 |
| children | fb0936cf5bef |
line wrap: on
line source
#!/usr/bin/env python import re import os import argparse import subprocess import tempfile from Bio import SeqIO from BCBio import GFF from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord from Bio.SeqFeature import SeqFeature, FeatureLocation def run_reprof(query_path, modeldir): outtmp = tempfile.NamedTemporaryFile(delete=False) cmd = [ 'reprof', '-i', query_path, '--modeldir=%s' % modeldir, '-o', outtmp.name ] subprocess.check_call(cmd) outtmp.seek(0) data = outtmp.read() outtmp.close() os.unlink(outtmp.name) return data def process_reprof_report(data): KEYS = ['idx', 'AA', 'PHEL', 'RI_S', 'pH', 'pE', 'pL', 'PACC', 'PREL', 'P10', 'RI_A', 'Pbe', 'Pbie'] data_tracks = {k: [] for k in KEYS} for line in data.split('\n'): if line.startswith('#') or line.startswith('No') or len(line.strip()) == 0: continue for idx, (key, value) in enumerate(zip(KEYS, line.strip().split('\t'))): # numerical columns if idx not in (1, 2, 11, 12): value = int(value) data_tracks[key].append(value) return data_tracks def storeWigData(idx, data, id, path): with open(path, 'a') as handle: handle.write('variableStep chrom=%s\n' % id) for (pos, val) in zip(idx, data): handle.write('%s %s\n' % (pos, val)) def storeGff3Data(path, id, positions, values, decodeMap): merged = ''.join(values) # http://stackoverflow.com/a/19683549 regions = [(x[0][0].upper(), len(x[0])) for x in re.findall('((.)(\\2*))', merged)] location = 1 rec = SeqRecord(Seq("ACTG"), id=id) for (region_char, region_length) in regions: # If we don't wish to decode this region, skip it. if region_char not in decodeMap: location += region_length continue region_info = decodeMap[region_char] # Create a feature representing this region region_feat = SeqFeature( FeatureLocation(location - 1, location - 1 + region_length), type=region_info['type'], strand=0, qualifiers={k: v for (k, v) in region_info.iteritems() if k != 'type'} ) # Update our start location location += region_length rec.features.append(region_feat) with open(path, 'a') as handle: GFF.write([rec], handle) def storeClassData(path, id, phel): h = float(phel.count('H')) / float(len(phel)) e = float(phel.count('E')) / float(len(phel)) if h > .45 and e < .05: classification = 'all-alpha' elif h < .05 and e > .45: classification = 'all-beta' elif h > .3 and e > .2: classification = 'alpha-beta' else: classification = 'mixed' with open(path, 'a') as handle: handle.write("{0}\t{1}\n".format(id, classification)) def main(fasta, modeldir): for record in SeqIO.parse(fasta, 'fasta'): tmp = tempfile.NamedTemporaryFile(delete=False) SeqIO.write([record], tmp, 'fasta') tmp.close() # Run reprof data = process_reprof_report(run_reprof(tmp.name, modeldir)) for col in ('RI_S', 'P10', 'RI_A', 'PACC', 'PREL', 'pH', 'pE', 'pL'): storeWigData(data['idx'], data[col], record.id, col + '.wig') eco = ['{ECO:0000255|reprof_1.0.1}'] storeGff3Data( 'secondary_structure.gff3', record.id, data['idx'], data['PHEL'], { 'H': { 'type': 'peptide_helix', 'description': ['Helix'], 'evidence': eco, }, 'E': { 'type': 'beta_strand', 'description': ['Extended/Sheet'], 'evidence': eco, }, 'L': { 'type': 'loop', 'description': ['Loop'], 'evidence': eco, } } ) storeGff3Data( 'solvent_accessibility.gff3', record.id, data['idx'], data['Pbe'], { 'B': { 'type': 'experimental_result_region', 'description': ['Buried'], 'evidence': eco, }, 'E': { 'type': 'experimental_result_region', 'description': ['Exposed'], 'evidence': eco, }, } ) storeClassData('report.tsv', record.id, data['PHEL']) if __name__ == '__main__': # Grab all of the filters from our plugin loader parser = argparse.ArgumentParser(description='Wrapper for reprof') parser.add_argument('fasta', type=file, help='Fasta Input') parser.add_argument('modeldir') args = parser.parse_args() main(**vars(args))
