Mercurial > repos > devteam > get_flanks
annotate get_flanks.py @ 3:2fdec558c935 draft
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
| author | devteam |
|---|---|
| date | Tue, 13 Oct 2015 12:50:28 -0400 |
| parents | 0c66884f0cac |
| children | dd9315a56c09 |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/bin/env python |
| 2 #Done by: Guru | |
| 3 | |
| 4 """ | |
| 5 Get Flanking regions. | |
| 6 | |
| 7 usage: %prog input out_file size direction region | |
| 8 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file | |
| 9 -o, --off=N: Offset | |
| 10 """ | |
| 11 | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
12 import sys |
| 0 | 13 from bx.cookbook import doc_optparse |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
14 from galaxy.tools.util.galaxyops import parse_cols_arg |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
15 |
| 0 | 16 |
| 17 def stop_err( msg ): | |
| 18 sys.stderr.write( msg ) | |
| 19 sys.exit() | |
| 20 | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
21 |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
22 def main(): |
| 0 | 23 try: |
| 24 if int( sys.argv[3] ) < 0: | |
| 25 raise Exception | |
| 26 except: | |
| 27 stop_err( "Length of flanking region(s) must be a non-negative integer." ) | |
| 28 | |
| 29 # Parsing Command Line here | |
| 30 options, args = doc_optparse.parse( __doc__ ) | |
| 31 try: | |
| 32 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) | |
| 33 inp_file, out_file, size, direction, region = args | |
| 34 if strand_col_1 <= 0: | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
35 strand = "+" # if strand is not defined, default it to + |
| 0 | 36 except: |
| 37 stop_err( "Metadata issue, correct the metadata attributes by clicking on the pencil icon in the history item." ) | |
| 38 try: | |
| 39 offset = int(options.off) | |
| 40 size = int(size) | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
41 except: |
| 0 | 42 stop_err( "Invalid offset or length entered. Try again by entering valid integer values." ) |
| 43 | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
44 fo = open(out_file, 'w') |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
45 |
| 0 | 46 skipped_lines = 0 |
| 47 first_invalid_line = 0 | |
| 48 invalid_line = None | |
| 49 elems = [] | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
50 j = 0 |
| 0 | 51 for i, line in enumerate( file( inp_file ) ): |
| 52 line = line.strip() | |
| 53 if line and (not line.startswith( '#' )) and line != '': | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
54 j += 1 |
| 0 | 55 try: |
| 56 elems = line.split('\t') | |
| 57 #if the start and/or end columns are not numbers, skip that line. | |
| 58 assert int(elems[start_col_1]) | |
| 59 assert int(elems[end_col_1]) | |
| 60 if strand_col_1 != -1: | |
| 61 strand = elems[strand_col_1] | |
| 62 #if the stand value is not + or -, skip that line. | |
| 63 assert strand in ['+', '-'] | |
| 64 if direction == 'Upstream': | |
| 65 if strand == '+': | |
| 66 if region == 'end': | |
| 67 elems[end_col_1] = str(int(elems[end_col_1]) + offset) | |
| 68 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | |
| 69 else: | |
| 70 elems[end_col_1] = str(int(elems[start_col_1]) + offset) | |
| 71 elems[start_col_1] = str( int(elems[end_col_1]) - size ) | |
| 72 elif strand == '-': | |
| 73 if region == 'end': | |
| 74 elems[start_col_1] = str(int(elems[start_col_1]) - offset) | |
| 75 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
| 76 else: | |
| 77 elems[start_col_1] = str(int(elems[end_col_1]) - offset) | |
| 78 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
| 79 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
| 80 fo.write( "%s\n" % '\t'.join( elems ) ) | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
81 |
| 0 | 82 elif direction == 'Downstream': |
| 83 if strand == '-': | |
| 84 if region == 'start': | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
85 elems[end_col_1] = str(int(elems[end_col_1]) - offset) |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
86 elems[start_col_1] = str( int(elems[end_col_1]) - size ) |
| 0 | 87 else: |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
88 elems[end_col_1] = str(int(elems[start_col_1]) - offset) |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
89 elems[start_col_1] = str( int(elems[end_col_1]) - size ) |
| 0 | 90 elif strand == '+': |
| 91 if region == 'start': | |
| 92 elems[start_col_1] = str(int(elems[start_col_1]) + offset) | |
| 93 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
| 94 else: | |
| 95 elems[start_col_1] = str(int(elems[end_col_1]) + offset) | |
| 96 elems[end_col_1] = str(int(elems[start_col_1]) + size) | |
| 97 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 | |
| 98 fo.write( "%s\n" % '\t'.join( elems ) ) | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
99 |
| 0 | 100 elif direction == 'Both': |
| 101 if strand == '-': | |
| 102 if region == 'start': | |
| 103 start = str(int(elems[end_col_1]) - offset) | |
| 104 end1 = str(int(start) + size) | |
| 105 end2 = str(int(start) - size) | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
106 elems[start_col_1] = start |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
107 elems[end_col_1] = end1 |
| 0 | 108 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
| 109 fo.write( "%s\n" % '\t'.join( elems ) ) | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
110 elems[start_col_1] = end2 |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
111 elems[end_col_1] = start |
| 0 | 112 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
| 113 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 114 elif region == 'end': | |
| 115 start = str(int(elems[start_col_1]) - offset) | |
| 116 end1 = str(int(start) + size) | |
| 117 end2 = str(int(start) - size) | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
118 elems[start_col_1] = start |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
119 elems[end_col_1] = end1 |
| 0 | 120 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
| 121 fo.write( "%s\n" % '\t'.join( elems ) ) | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
122 elems[start_col_1] = end2 |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
123 elems[end_col_1] = start |
| 0 | 124 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
| 125 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 126 else: | |
| 127 start1 = str(int(elems[end_col_1]) - offset) | |
| 128 end1 = str(int(start1) + size) | |
| 129 start2 = str(int(elems[start_col_1]) - offset) | |
| 130 end2 = str(int(start2) - size) | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
131 elems[start_col_1] = start1 |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
132 elems[end_col_1] = end1 |
| 0 | 133 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
| 134 fo.write( "%s\n" % '\t'.join( elems ) ) | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
135 elems[start_col_1] = end2 |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
136 elems[end_col_1] = start2 |
| 0 | 137 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
| 138 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 139 elif strand == '+': | |
| 140 if region == 'start': | |
| 141 start = str(int(elems[start_col_1]) + offset) | |
| 142 end1 = str(int(start) - size) | |
| 143 end2 = str(int(start) + size) | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
144 elems[start_col_1] = end1 |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
145 elems[end_col_1] = start |
| 0 | 146 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
| 147 fo.write( "%s\n" % '\t'.join( elems ) ) | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
148 elems[start_col_1] = start |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
149 elems[end_col_1] = end2 |
| 0 | 150 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
| 151 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 152 elif region == 'end': | |
| 153 start = str(int(elems[end_col_1]) + offset) | |
| 154 end1 = str(int(start) - size) | |
| 155 end2 = str(int(start) + size) | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
156 elems[start_col_1] = end1 |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
157 elems[end_col_1] = start |
| 0 | 158 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
| 159 fo.write( "%s\n" % '\t'.join( elems ) ) | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
160 elems[start_col_1] = start |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
161 elems[end_col_1] = end2 |
| 0 | 162 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
| 163 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 164 else: | |
| 165 start1 = str(int(elems[start_col_1]) + offset) | |
| 166 end1 = str(int(start1) - size) | |
| 167 start2 = str(int(elems[end_col_1]) + offset) | |
| 168 end2 = str(int(start2) + size) | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
169 elems[start_col_1] = end1 |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
170 elems[end_col_1] = start1 |
| 0 | 171 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
| 172 fo.write( "%s\n" % '\t'.join( elems ) ) | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
173 elems[start_col_1] = start2 |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
174 elems[end_col_1] = end2 |
| 0 | 175 assert int(elems[start_col_1]) > 0 and int(elems[end_col_1]) > 0 |
| 176 fo.write( "%s\n" % '\t'.join( elems ) ) | |
| 177 except: | |
| 178 skipped_lines += 1 | |
| 179 if not invalid_line: | |
| 180 first_invalid_line = i + 1 | |
| 181 invalid_line = line | |
| 182 fo.close() | |
| 183 | |
| 184 if skipped_lines == j: | |
| 185 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) | |
| 186 if skipped_lines > 0: | |
| 187 print 'Skipped %d invalid lines starting with #%dL "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) | |
|
3
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
188 print 'Location: %s, Region: %s, Flank-length: %d, Offset: %d ' % ( direction, region, size, offset ) |
|
2fdec558c935
planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents:
0
diff
changeset
|
189 |
| 0 | 190 if __name__ == "__main__": |
| 191 main() |
