Mercurial > repos > iuc > reprof
diff reprof.py @ 0:7c33ed152672 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/reprof commit cc30e84dcc309a93f49f75250aa3344f926cee5e-dirty
| author | iuc |
|---|---|
| date | Mon, 30 Nov 2015 20:10:58 -0500 |
| parents | |
| children | 141da185be70 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/reprof.py Mon Nov 30 20:10:58 2015 -0500 @@ -0,0 +1,134 @@ +#!/usr/bin/env python +import re +import os +import argparse +import subprocess +import tempfile +from Bio import SeqIO +from BCBio import GFF +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +from Bio.SeqFeature import SeqFeature, FeatureLocation + +def run_reprof(query_path, modeldir): + outtmp = tempfile.NamedTemporaryFile(delete=False) + cmd = [ + './reprof/scripts/reprof', + '-i', query_path, + '--modeldir=%s' % modeldir, + '-o', outtmp.name + ] + subprocess.check_call(cmd) + outtmp.seek(0) + data = outtmp.read() + outtmp.close() + os.unlink(outtmp.name) + return data + +def process_reprof_report(data): + KEYS = ['idx', 'AA', 'PHEL', 'RI_S', 'pH', 'pE', 'pL', 'PACC', 'PREL', 'P10', 'RI_A', 'Pbe', 'Pbie'] + data_tracks = {k: [] for k in KEYS} + + for line in data.split('\n'): + if line.startswith('#') or line.startswith('No') or len(line.strip()) == 0: + continue + + for idx, (key, value) in enumerate(zip(KEYS, line.strip().split('\t'))): + # numerical columns + if idx not in (1, 2, 11, 12): + value = int(value) + + data_tracks[key].append(value) + return data_tracks + +def storeWigData(idx, data, id, path): + with open(path, 'a') as handle: + handle.write('variableStep chrom=%s\n' % id) + for (pos, val) in zip(idx, data): + handle.write('%s %s\n' % (pos, val)) + +def storeGff3Data(path, id, positions, values, decodeMap): + merged = ''.join(values) + # http://stackoverflow.com/a/19683549 + regions = [(x[0][0].upper(), len(x[0])) for x in re.findall('((.)(\\2*))', merged)] + + location = 1 + + rec = SeqRecord(Seq("ACTG"), id=id) + for (region_char, region_length) in regions: + # If we don't wish to decode this region, skip it. + if region_char not in decodeMap: + location += region_length + continue + + region_info = decodeMap[region_char] + # Create a feature representing this region + region_feat = SeqFeature( + FeatureLocation(location - 1, location - 1 + region_length), + type=region_info['type'], strand=0, + qualifiers={k: v for (k, v) in region_info.iteritems() if k != 'type'} + ) + # Update our start location + location += region_length + rec.features.append(region_feat) + + with open(path, 'a') as handle: + GFF.write([rec], handle) + +def main(fasta, modeldir): + for record in SeqIO.parse(fasta, 'fasta'): + tmp = tempfile.NamedTemporaryFile(delete=False) + SeqIO.write([record], tmp, 'fasta') + tmp.close() + + # Run reprof + data = process_reprof_report(run_reprof(tmp.name, modeldir)) + for col in ('RI_S', 'P10', 'RI_A', 'PACC', 'PREL', 'pH', 'pE', 'pL'): + storeWigData(data['idx'], data[col], record.id, col + '.wig') + + storeGff3Data( + 'secondary_structure.gff3', record.id, data['idx'], data['PHEL'], + { + 'H': { + 'type': 'peptide_helix', + 'label': ['Helix'], + 'evidence': ['ECO:0000255'] + }, + 'E': { + 'type': 'beta_strand', + 'label': ['Extended/Sheet'], + 'evidence': ['ECO:0000255'] + }, + 'L': { + 'type': 'loop', + 'label': ['Loop'], + 'evidence': ['ECO:0000255'] + } + } + ) + + storeGff3Data( + 'solvent_accessibility.gff3', record.id, data['idx'], data['Pbe'], + { + 'B': { + 'type': 'experimental_result_region', + 'label': ['Buried'], + 'evidence': ['ECO:0000255'] + }, + 'E': { + 'type': 'experimental_result_region', + 'label': ['Exposed'], + 'evidence': ['ECO:0000255'] + }, + } + ) + + +if __name__ == '__main__': + # Grab all of the filters from our plugin loader + parser = argparse.ArgumentParser(description='Wrapper for reprof') + parser.add_argument('fasta', type=file, help='Fasta Input') + parser.add_argument('modeldir') + args = parser.parse_args() + + main(**vars(args))
