Mercurial > repos > devteam > flanking_features
comparison flanking_features.py @ 3:94248d5b9b8b draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/flanking_features commit cae3e05d02e60f595bb8b6d77a84f030e9bd1689
| author | devteam |
|---|---|
| date | Thu, 22 Jun 2017 18:39:31 -0400 |
| parents | 850c05b9af00 |
| children |
comparison
equal
deleted
inserted
replaced
| 2:d94e778c3ad1 | 3:94248d5b9b8b |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 #By: Guruprasad Ananda | 2 # By: Guruprasad Ananda |
| 3 """ | 3 """ |
| 4 Fetch closest up/downstream interval from features corresponding to every interval in primary | 4 Fetch closest up/downstream interval from features corresponding to every interval in primary |
| 5 | 5 |
| 6 usage: %prog primary_file features_file out_file direction | 6 usage: %prog primary_file features_file out_file direction |
| 7 -1, --cols1=N,N,N,N: Columns for start, end, strand in first file | 7 -1, --cols1=N,N,N,N: Columns for start, end, strand in first file |
| 8 -2, --cols2=N,N,N,N: Columns for start, end, strand in second file | 8 -2, --cols2=N,N,N,N: Columns for start, end, strand in second file |
| 9 -G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval | 9 -G, --gff1: input 1 is GFF format, meaning start and end coordinates are 1-based, closed interval |
| 10 -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval | 10 -H, --gff2: input 2 is GFF format, meaning start and end coordinates are 1-based, closed interval |
| 11 """ | 11 """ |
| 12 from __future__ import print_function | |
| 12 | 13 |
| 13 import fileinput | 14 import fileinput |
| 14 import sys | 15 import sys |
| 16 | |
| 15 from bx.cookbook import doc_optparse | 17 from bx.cookbook import doc_optparse |
| 16 from bx.intervals.io import Comment, GenomicInterval, Header, NiceReaderWrapper | 18 from bx.intervals.io import Comment, GenomicInterval, Header, NiceReaderWrapper |
| 17 from bx.intervals.operations import quicksect | 19 from bx.intervals.operations import quicksect |
| 18 from bx.tabular.io import ParseError | 20 from bx.tabular.io import ParseError |
| 19 from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped | 21 from galaxy.tools.util.galaxyops import fail, parse_cols_arg, skipped |
| 22 | |
| 20 from utils.gff_util import convert_bed_coords_to_gff, GFFIntervalToBEDReaderWrapper | 23 from utils.gff_util import convert_bed_coords_to_gff, GFFIntervalToBEDReaderWrapper |
| 21 | 24 |
| 22 assert sys.version_info[:2] >= ( 2, 4 ) | 25 assert sys.version_info[:2] >= ( 2, 4 ) |
| 23 | 26 |
| 24 | 27 |
| 25 def get_closest_feature(node, direction, threshold_up, threshold_down, report_func_up, report_func_down): | 28 def get_closest_feature(node, direction, threshold_up, threshold_down, report_func_up, report_func_down): |
| 26 #direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases | 29 # direction=1 for +ve strand upstream and -ve strand downstream cases; and it is 0 for +ve strand downstream and -ve strand upstream cases |
| 27 #threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand | 30 # threhold_Up is equal to the interval start for +ve strand, and interval end for -ve strand |
| 28 #threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand | 31 # threhold_down is equal to the interval end for +ve strand, and interval start for -ve strand |
| 29 if direction == 1: | 32 if direction == 1: |
| 30 if node.maxend <= threshold_up: | 33 if node.maxend <= threshold_up: |
| 31 if node.end == node.maxend: | 34 if node.end == node.maxend: |
| 32 report_func_up(node) | 35 report_func_up(node) |
| 33 elif node.right and node.left: | 36 elif node.right and node.left: |
| 101 else: | 104 else: |
| 102 root = rightTree.chroms[chrom] # root node for the chrom tree | 105 root = rightTree.chroms[chrom] # root node for the chrom tree |
| 103 result_up = [] | 106 result_up = [] |
| 104 result_down = [] | 107 result_down = [] |
| 105 if (strand == '+' and up) or (strand == '-' and down): | 108 if (strand == '+' and up) or (strand == '-' and down): |
| 106 #upstream +ve strand and downstream -ve strand cases | 109 # upstream +ve strand and downstream -ve strand cases |
| 107 get_closest_feature(root, 1, start, None, lambda node: result_up.append( node ), None) | 110 get_closest_feature(root, 1, start, None, lambda node: result_up.append( node ), None) |
| 108 | 111 |
| 109 if (strand == '+' and down) or (strand == '-' and up): | 112 if (strand == '+' and down) or (strand == '-' and up): |
| 110 #downstream +ve strand and upstream -ve strand case | 113 # downstream +ve strand and upstream -ve strand case |
| 111 get_closest_feature(root, 0, None, end - 1, None, lambda node: result_down.append( node )) | 114 get_closest_feature(root, 0, None, end - 1, None, lambda node: result_down.append( node )) |
| 112 | 115 |
| 113 if result_up: | 116 if result_up: |
| 114 if len(result_up) > 1: # The results_up list has a list of intervals upstream to the given interval. | 117 if len(result_up) > 1: # The results_up list has a list of intervals upstream to the given interval. |
| 115 ends = [] | 118 ends = [] |
| 121 if not(either): | 124 if not(either): |
| 122 yield [ interval, result_up[res_ind].other ] | 125 yield [ interval, result_up[res_ind].other ] |
| 123 | 126 |
| 124 if result_down: | 127 if result_down: |
| 125 if not(either): | 128 if not(either): |
| 126 #The last element of result_down will be the closest element to the given interval | 129 # The last element of result_down will be the closest element to the given interval |
| 127 yield [ interval, result_down[-1].other ] | 130 yield [ interval, result_down[-1].other ] |
| 128 | 131 |
| 129 if either and (result_up or result_down): | 132 if either and (result_up or result_down): |
| 130 iter_val = [] | 133 iter_val = [] |
| 131 if result_up and result_down: | 134 if result_up and result_down: |
| 132 if abs(start - int(result_up[res_ind].end)) <= abs(end - int(result_down[-1].start)): | 135 if abs(start - int(result_up[res_ind].end)) <= abs(end - int(result_down[-1].start)): |
| 133 iter_val = [ interval, result_up[res_ind].other ] | 136 iter_val = [ interval, result_up[res_ind].other ] |
| 134 else: | 137 else: |
| 135 #The last element of result_down will be the closest element to the given interval | 138 # The last element of result_down will be the closest element to the given interval |
| 136 iter_val = [ interval, result_down[-1].other ] | 139 iter_val = [ interval, result_down[-1].other ] |
| 137 elif result_up: | 140 elif result_up: |
| 138 iter_val = [ interval, result_up[res_ind].other ] | 141 iter_val = [ interval, result_up[res_ind].other ] |
| 139 elif result_down: | 142 elif result_down: |
| 140 #The last element of result_down will be the closest element to the given interval | 143 # The last element of result_down will be the closest element to the given interval |
| 141 iter_val = [ interval, result_down[-1].other ] | 144 iter_val = [ interval, result_down[-1].other ] |
| 142 yield iter_val | 145 yield iter_val |
| 143 | 146 |
| 144 | 147 |
| 145 def main(): | 148 def main(): |
| 201 output_line_fields.extend( line.fields ) | 204 output_line_fields.extend( line.fields ) |
| 202 output_line_fields.extend( closest_feature.fields ) | 205 output_line_fields.extend( closest_feature.fields ) |
| 203 out_file.write( "%s\n" % ( "\t".join( output_line_fields ) ) ) | 206 out_file.write( "%s\n" % ( "\t".join( output_line_fields ) ) ) |
| 204 else: | 207 else: |
| 205 out_file.write( "%s\n" % result ) | 208 out_file.write( "%s\n" % result ) |
| 206 except ParseError, exc: | 209 except ParseError as exc: |
| 207 fail( "Invalid file format: %s" % str( exc ) ) | 210 fail( "Invalid file format: %s" % str( exc ) ) |
| 208 | 211 |
| 209 print "Direction: %s" % (direction) | 212 print("Direction: %s" % (direction)) |
| 210 if g1.skipped > 0: | 213 if g1.skipped > 0: |
| 211 print skipped( g1, filedesc=" of 1st dataset" ) | 214 print(skipped( g1, filedesc=" of 1st dataset" )) |
| 212 if g2.skipped > 0: | 215 if g2.skipped > 0: |
| 213 print skipped( g2, filedesc=" of 2nd dataset" ) | 216 print(skipped( g2, filedesc=" of 2nd dataset" )) |
| 217 | |
| 214 | 218 |
| 215 if __name__ == "__main__": | 219 if __name__ == "__main__": |
| 216 main() | 220 main() |
