Mercurial > repos > galaxyp > gffcompare_to_bed
comparison gffcompare_to_bed.py @ 0:a873420ef833 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/gffcompare_to_bed commit 321b217382f6be33bd77c7dbb51c8caf5fa50afe
| author | galaxyp |
|---|---|
| date | Thu, 11 Jan 2018 11:16:32 -0500 |
| parents | |
| children | 2e24ff66228f |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a873420ef833 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 # | |
| 4 #------------------------------------------------------------------------------ | |
| 5 # University of Minnesota | |
| 6 # Copyright 2017, Regents of the University of Minnesota | |
| 7 #------------------------------------------------------------------------------ | |
| 8 # Author: | |
| 9 # | |
| 10 # James E Johnson | |
| 11 # | |
| 12 #------------------------------------------------------------------------------ | |
| 13 """ | |
| 14 | |
| 15 import argparse | |
| 16 import sys | |
| 17 | |
| 18 | |
| 19 class BedEntry(object): | |
| 20 def __init__(self, chrom=None, chromStart=None, chromEnd=None, | |
| 21 name=None, score=None, strand=None, | |
| 22 thickStart=None, thickEnd=None, itemRgb=None, | |
| 23 blockCount=None, blockSizes=None, blockStarts=None): | |
| 24 self.chrom = chrom | |
| 25 self.chromStart = int(chromStart) | |
| 26 self.chromEnd = int(chromEnd) | |
| 27 self.name = name | |
| 28 self.score = int(score) if score is not None else 0 | |
| 29 self.strand = '-' if str(strand).startswith('-') else '+' | |
| 30 self.thickStart = int(thickStart) if thickStart else self.chromStart | |
| 31 self.thickEnd = int(thickEnd) if thickEnd else self.chromEnd | |
| 32 self.itemRgb = str(itemRgb) if itemRgb is not None else r'100,100,100' | |
| 33 self.blockCount = int(blockCount) | |
| 34 if isinstance(blockSizes, str) or isinstance(blockSizes, unicode): | |
| 35 self.blockSizes = [int(x) for x in blockSizes.split(',')] | |
| 36 elif isinstance(blockSizes, list): | |
| 37 self.blockSizes = [int(x) for x in blockSizes] | |
| 38 else: | |
| 39 self.blockSizes = blockSizes | |
| 40 if isinstance(blockStarts, str) or isinstance(blockSizes, unicode): | |
| 41 self.blockStarts = [int(x) for x in blockStarts.split(',')] | |
| 42 elif isinstance(blockStarts, list): | |
| 43 self.blockStarts = [int(x) for x in blockStarts] | |
| 44 else: | |
| 45 self.blockStarts = blockStarts | |
| 46 | |
| 47 def __str__(self): | |
| 48 return '%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%s\t%s' % ( | |
| 49 self.chrom, self.chromStart, self.chromEnd, | |
| 50 self.name, self.score, self.strand, | |
| 51 self.thickStart, self.thickEnd, str(self.itemRgb), self.blockCount, | |
| 52 ','.join([str(x) for x in self.blockSizes]), | |
| 53 ','.join([str(x) for x in self.blockStarts])) | |
| 54 | |
| 55 | |
| 56 def __main__(): | |
| 57 parser = argparse.ArgumentParser( | |
| 58 description='Retrieve Ensembl cDNAs and three frame translate') | |
| 59 parser.add_argument( | |
| 60 'input', | |
| 61 help='GFFCompare annotated GTF file, (-) for stdin') | |
| 62 parser.add_argument( | |
| 63 'output', | |
| 64 help='BED file, (-) for stdout') | |
| 65 parser.add_argument( | |
| 66 '-C', '--class_code', action='append', default=[], | |
| 67 help='Restrict output to gffcompare class codes') | |
| 68 parser.add_argument('-d', '--debug', action='store_true', help='Debug') | |
| 69 args = parser.parse_args() | |
| 70 | |
| 71 # print >> sys.stderr, "args: %s" % args | |
| 72 input_rdr = open(args.input, 'r') if args.input != '-' else sys.stdin | |
| 73 output_wtr = open(args.output, 'w') if args.output != '-' else sys.stdout | |
| 74 | |
| 75 def write_bed_entry(bed): | |
| 76 if bed.blockCount == 0: | |
| 77 bed.blockCount = 1 | |
| 78 output_wtr.write("%s\n" % str(bed)) | |
| 79 | |
| 80 class_codes = [c.strip() for codes in args.class_code | |
| 81 for c in codes.split(',')] if args.class_code else None | |
| 82 bed = None | |
| 83 class_code = None | |
| 84 for i, line in enumerate(input_rdr): | |
| 85 if line.startswith('#'): | |
| 86 continue | |
| 87 fields = line.rstrip('\r\n').split('\t') | |
| 88 if len(fields) != 9: | |
| 89 continue | |
| 90 (seqname, source, feature, start, end, | |
| 91 score, strand, frame, attributes) = fields | |
| 92 attribute = {i[0]: i[1].strip('"') for i in [j.strip().split(' ') | |
| 93 for j in attributes.rstrip(';').split(';')]} | |
| 94 if feature == 'transcript': | |
| 95 if args.debug: | |
| 96 print >> sys.stderr, "%s\t%s"\ | |
| 97 % ('\t'.join([seqname, source, feature, | |
| 98 start, end, score, strand, frame]), | |
| 99 attribute) | |
| 100 if bed is not None: | |
| 101 write_bed_entry(bed) | |
| 102 bed = None | |
| 103 class_code = attribute['class_code'].strip('"')\ | |
| 104 if 'class_code' in attribute else None | |
| 105 if class_codes and class_code not in class_codes: | |
| 106 continue | |
| 107 chromStart = int(start) - 1 | |
| 108 chromEnd = int(end) | |
| 109 cat = '_' + class_code if class_code and class_code != '=' else '' | |
| 110 bed = BedEntry(chrom=seqname, | |
| 111 chromStart=chromStart, chromEnd=chromEnd, | |
| 112 name=attribute['transcript_id'] + cat, | |
| 113 strand=strand, | |
| 114 blockCount=0, | |
| 115 blockSizes=[chromEnd - chromStart], | |
| 116 blockStarts=[0]) | |
| 117 elif feature == 'exon' and bed is not None: | |
| 118 chromStart = int(start) - 1 | |
| 119 chromEnd = int(end) | |
| 120 blockSize = chromEnd - chromStart | |
| 121 if bed.blockCount == 0: | |
| 122 bed.blockSizes = [] | |
| 123 bed.blockStarts = [] | |
| 124 bed.blockSizes.append(blockSize) | |
| 125 bed.blockStarts.append(chromStart - bed.chromStart) | |
| 126 bed.blockCount += 1 | |
| 127 if bed is not None: | |
| 128 write_bed_entry(bed) | |
| 129 | |
| 130 | |
| 131 if __name__ == "__main__": | |
| 132 __main__() |
