Mercurial > repos > devteam > get_flanks
comparison get_flanks.py @ 0:0c66884f0cac
Imported from capsule None
| author | devteam |
|---|---|
| date | Tue, 01 Apr 2014 09:12:39 -0400 |
| parents | |
| children | 2fdec558c935 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:0c66884f0cac |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 #Done by: Guru | |
| 3 | |
| 4 """ | |
| 5 Get Flanking regions. | |
| 6 | |
| 7 usage: %prog input out_file size direction region | |
| 8 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file | |
| 9 -o, --off=N: Offset | |
| 10 """ | |
| 11 | |
| 12 import sys, re, os | |
| 13 from bx.cookbook import doc_optparse | |
| 14 from galaxy.tools.util.galaxyops import * | |
| 15 | |
| 16 def stop_err( msg ): | |
| 17 sys.stderr.write( msg ) | |
| 18 sys.exit() | |
| 19 | |
| 20 def main(): | |
| 21 try: | |
| 22 if int( sys.argv[3] ) < 0: | |
| 23 raise Exception | |
| 24 except: | |
| 25 stop_err( "Length of flanking region(s) must be a non-negative integer." ) | |
| 26 | |
| 27 # Parsing Command Line here | |
| 28 options, args = doc_optparse.parse( __doc__ ) | |
| 29 try: | |
| 30 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) | |
| 31 inp_file, out_file, size, direction, region = args | |
| 32 if strand_col_1 <= 0: | |
| 33 strand = "+" #if strand is not defined, default it to + | |
| 34 except: | |
| 35 stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." ) | |
| 36 try: | |
| 37 offset = int(options.off) | |
| 38 size = int(size) | |
| 39 except: | |
| 40 stop_err( "Invalid offset or length entered. Try again by entering valid integer values." ) | |
| 41 | |
| 42 fo = open(out_file,'w') | |
| 43 | |
| 44 skipped_lines = 0 | |
| 45 first_invalid_line = 0 | |
| 46 invalid_line = None | |
| 47 elems = [] | |
| 48 j=0 | |
| 49 for i, line in enumerate( file( inp_file ) ): | |
| 50 line = line.strip() | |
| 51 if line and (not line.startswith( '#' )) and line != '': | |
| 52 j+=1 | |
| 53 try: | |
| 54 elems = line.split('\t') | |
| 55 #if the start and/or end columns are not numbers, skip that line. | |
| 56 assert int(elems[start_col_1]) | |
| 57 assert int(elems[end_col_1]) | |
| 58 if strand_col_1 != -1: | |
| 59 strand = elems[strand_col_1] | |
| 60 #if the stand value is not + or -, skip that line. | |
| 61 assert strand in ['+', '-'] | |
| 62 if direction == 'Upstream': | |
| 63 if strand == '+': | |
| 64 if region == 'end': | |
| 65 elems[end_col_1] = str(int(elems[end_col_1]) + offset) | |
| 66 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | |
| 67 else: | |
| 68 elems[end_col_1] = str(int(elems[start_col_1]) + offset) | |
| 69 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | |
| 70 elif strand == '-': | |
| 71 if region == 'end': | |
| 72 elems[start_col_1] = str(int(elems[start_col_1]) - offset) | |
| 73 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
| 74 else: | |
| 75 elems[start_col_1] = str(int(elems[end_col_1]) - offset) | |
| 76 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
| 77 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
| 78 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 79 | |
| 80 elif direction == 'Downstream': | |
| 81 if strand == '-': | |
| 82 if region == 'start': | |
| 83 elems[end_col_1] = str(int(elems[end_col_1]) - offset) | |
| 84 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | |
| 85 else: | |
| 86 elems[end_col_1] = str(int(elems[start_col_1]) - offset) | |
| 87 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | |
| 88 elif strand == '+': | |
| 89 if region == 'start': | |
| 90 elems[start_col_1] = str(int(elems[start_col_1]) + offset) | |
| 91 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
| 92 else: | |
| 93 elems[start_col_1] = str(int(elems[end_col_1]) + offset) | |
| 94 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
| 95 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
| 96 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 97 | |
| 98 elif direction == 'Both': | |
| 99 if strand == '-': | |
| 100 if region == 'start': | |
| 101 start = str(int(elems[end_col_1]) - offset) | |
| 102 end1 = str(int(start) + size) | |
| 103 end2 = str(int(start) - size) | |
| 104 elems[start_col_1]=start | |
| 105 elems[end_col_1]=end1 | |
| 106 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
| 107 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 108 elems[start_col_1]=end2 | |
| 109 elems[end_col_1]=start | |
| 110 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
| 111 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 112 elif region == 'end': | |
| 113 start = str(int(elems[start_col_1]) - offset) | |
| 114 end1 = str(int(start) + size) | |
| 115 end2 = str(int(start) - size) | |
| 116 elems[start_col_1]=start | |
| 117 elems[end_col_1]=end1 | |
| 118 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
| 119 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 120 elems[start_col_1]=end2 | |
| 121 elems[end_col_1]=start | |
| 122 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
| 123 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 124 else: | |
| 125 start1 = str(int(elems[end_col_1]) - offset) | |
| 126 end1 = str(int(start1) + size) | |
| 127 start2 = str(int(elems[start_col_1]) - offset) | |
| 128 end2 = str(int(start2) - size) | |
| 129 elems[start_col_1]=start1 | |
| 130 elems[end_col_1]=end1 | |
| 131 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
| 132 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 133 elems[start_col_1]=end2 | |
| 134 elems[end_col_1]=start2 | |
| 135 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
| 136 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 137 elif strand == '+': | |
| 138 if region == 'start': | |
| 139 start = str(int(elems[start_col_1]) + offset) | |
| 140 end1 = str(int(start) - size) | |
| 141 end2 = str(int(start) + size) | |
| 142 elems[start_col_1]=end1 | |
| 143 elems[end_col_1]=start | |
| 144 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
| 145 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 146 elems[start_col_1]=start | |
| 147 elems[end_col_1]=end2 | |
| 148 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
| 149 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 150 elif region == 'end': | |
| 151 start = str(int(elems[end_col_1]) + offset) | |
| 152 end1 = str(int(start) - size) | |
| 153 end2 = str(int(start) + size) | |
| 154 elems[start_col_1]=end1 | |
| 155 elems[end_col_1]=start | |
| 156 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
| 157 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 158 elems[start_col_1]=start | |
| 159 elems[end_col_1]=end2 | |
| 160 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
| 161 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 162 else: | |
| 163 start1 = str(int(elems[start_col_1]) + offset) | |
| 164 end1 = str(int(start1) - size) | |
| 165 start2 = str(int(elems[end_col_1]) + offset) | |
| 166 end2 = str(int(start2) + size) | |
| 167 elems[start_col_1]=end1 | |
| 168 elems[end_col_1]=start1 | |
| 169 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
| 170 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 171 elems[start_col_1]=start2 | |
| 172 elems[end_col_1]=end2 | |
| 173 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
| 174 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 175 except: | |
| 176 skipped_lines += 1 | |
| 177 if not invalid_line: | |
| 178 first_invalid_line = i + 1 | |
| 179 invalid_line = line | |
| 180 fo.close() | |
| 181 | |
| 182 if skipped_lines == j: | |
| 183 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) | |
| 184 if skipped_lines > 0: | |
| 185 print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) | |
| 186 print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' %( direction, region, size, offset ) | |
| 187 | |
| 188 if __name__ == "__main__": | |
| 189 main() |
