Mercurial > repos > yating-l > hubarchivecreator
comparison util/subtools.py @ 66:4ca7cbf2d9b8 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 159730602ff500b59aefc7080fb49b726c88d655-dirty
| author | yating-l |
|---|---|
| date | Tue, 26 Sep 2017 16:44:58 -0400 |
| parents | b39dd0b5a166 |
| children |
comparison
equal
deleted
inserted
replaced
| 65:5a4206366b41 | 66:4ca7cbf2d9b8 |
|---|---|
| 11 import subprocess | 11 import subprocess |
| 12 import sys | 12 import sys |
| 13 import string | 13 import string |
| 14 import tempfile | 14 import tempfile |
| 15 | 15 |
| 16 | |
| 16 class PopenError(Exception): | 17 class PopenError(Exception): |
| 17 def __init__(self, cmd, error, return_code): | 18 def __init__(self, cmd, error, return_code): |
| 18 self.cmd = cmd | 19 self.cmd = cmd |
| 19 self.error = error | 20 self.error = error |
| 20 self.return_code = return_code | 21 self.return_code = return_code |
| 21 | 22 |
| 22 def __str__(self): | 23 def __str__(self): |
| 23 message = "The subprocess {0} has returned the error: {1}.".format(self.cmd, self.return_code) | 24 message = "The subprocess {0} has returned the error: {1}.".format( |
| 24 message = ','.join((message, "Its error message is: {0}".format(self.error))) | 25 self.cmd, self.return_code) |
| 26 message = ','.join( | |
| 27 (message, "Its error message is: {0}".format(self.error))) | |
| 25 return repr(message) | 28 return repr(message) |
| 29 | |
| 26 | 30 |
| 27 def _handleExceptionAndCheckCall(array_call, **kwargs): | 31 def _handleExceptionAndCheckCall(array_call, **kwargs): |
| 28 """ | 32 """ |
| 29 This class handle exceptions and call the tool. | 33 This class handle exceptions and call the tool. |
| 30 It maps the signature of subprocess.check_call: | 34 It maps the signature of subprocess.check_call: |
| 39 output = None | 43 output = None |
| 40 error = None | 44 error = None |
| 41 | 45 |
| 42 # TODO: Check the value of array_call and <=[0] | 46 # TODO: Check the value of array_call and <=[0] |
| 43 logging.debug("Calling {0}:".format(cmd)) | 47 logging.debug("Calling {0}:".format(cmd)) |
| 44 | 48 logging.debug("%s", array_call) |
| 45 logging.debug("---------") | 49 logging.debug("---------") |
| 46 | 50 |
| 47 # TODO: Use universal_newlines option from Popen? | 51 # TODO: Use universal_newlines option from Popen? |
| 48 try: | 52 try: |
| 49 p = subprocess.Popen(array_call, stdout=stdout, stderr=stderr, shell=shell) | 53 p = subprocess.Popen(array_call, stdout=stdout, |
| 54 stderr=stderr, shell=shell) | |
| 50 | 55 |
| 51 # TODO: Change this because of possible memory issues => https://docs.python.org/2/library/subprocess.html#subprocess.Popen.communicate | 56 # TODO: Change this because of possible memory issues => https://docs.python.org/2/library/subprocess.html#subprocess.Popen.communicate |
| 52 | 57 |
| 53 output, error = p.communicate() | 58 output, error = p.communicate() |
| 54 | 59 |
| 62 if p.returncode: | 67 if p.returncode: |
| 63 if stderr == subprocess.PIPE: | 68 if stderr == subprocess.PIPE: |
| 64 raise PopenError(cmd, error, p.returncode) | 69 raise PopenError(cmd, error, p.returncode) |
| 65 else: | 70 else: |
| 66 # TODO: To Handle properly with a design behind, if we received a option as a file for the error | 71 # TODO: To Handle properly with a design behind, if we received a option as a file for the error |
| 67 raise Exception("Error when calling {0}. Error as been logged in your file {1}. Error code: {2}"\ | 72 raise Exception("Error when calling {0}. Error as been logged in your file {1}. Error code: {2}" |
| 68 .format(cmd, stderr.name, p.returncode)) | 73 .format(cmd, stderr.name, p.returncode)) |
| 69 | 74 |
| 70 except OSError as e: | 75 except OSError as e: |
| 71 message = "The subprocess {0} has encountered an OSError: {1}".format(cmd, e.strerror) | 76 message = "The subprocess {0} has encountered an OSError: {1}".format( |
| 77 cmd, e.strerror) | |
| 72 if e.filename: | 78 if e.filename: |
| 73 message = '\n'.join((message, ", against this file: {0}".format(e.filename))) | 79 message = '\n'.join( |
| 80 (message, ", against this file: {0}".format(e.filename))) | |
| 74 logging.error(message) | 81 logging.error(message) |
| 75 sys.exit(-1) | 82 sys.exit(-1) |
| 76 except PopenError as p: | 83 except PopenError as p: |
| 77 message = "The subprocess {0} has returned the error: {1}.".format(p.cmd, p.return_code) | 84 message = "The subprocess {0} has returned the error: {1}.".format( |
| 78 message = '\n'.join((message, "Its error message is: {0}".format(p.error))) | 85 p.cmd, p.return_code) |
| 86 message = '\n'.join( | |
| 87 (message, "Its error message is: {0}".format(p.error))) | |
| 79 | 88 |
| 80 logging.exception(message) | 89 logging.exception(message) |
| 81 | 90 |
| 82 sys.exit(p.return_code) | 91 sys.exit(p.return_code) |
| 83 except Exception as e: | 92 except Exception as e: |
| 84 message = "The subprocess {0} has encountered an unknown error: {1}".format(cmd, e) | 93 message = "The subprocess {0} has encountered an unknown error: {1}".format( |
| 94 cmd, e) | |
| 85 logging.exception(message) | 95 logging.exception(message) |
| 86 | 96 |
| 87 sys.exit(-1) | 97 sys.exit(-1) |
| 88 return p | 98 return p |
| 99 | |
| 89 | 100 |
| 90 def twoBitInfo(two_bit_file_name, two_bit_info_file): | 101 def twoBitInfo(two_bit_file_name, two_bit_info_file): |
| 91 """ | 102 """ |
| 92 Call twoBitInfo and write the result into twoBit_info_file | 103 Call twoBitInfo and write the result into twoBit_info_file |
| 93 :param two_bit_file_name: | 104 :param two_bit_file_name: |
| 96 """ | 107 """ |
| 97 array_call = ['twoBitInfo', two_bit_file_name, two_bit_info_file] | 108 array_call = ['twoBitInfo', two_bit_file_name, two_bit_info_file] |
| 98 p = _handleExceptionAndCheckCall(array_call) | 109 p = _handleExceptionAndCheckCall(array_call) |
| 99 return p | 110 return p |
| 100 | 111 |
| 112 | |
| 101 def faToTwoBit(fasta_file_name, twoBitFile): | 113 def faToTwoBit(fasta_file_name, twoBitFile): |
| 102 """ | 114 """ |
| 103 This function call faToTwoBit UCSC tool, and return the twoBitFile | 115 This function call faToTwoBit UCSC tool, and return the twoBitFile |
| 104 :param fasta_file_name: | 116 :param fasta_file_name: |
| 105 :param mySpecieFolder: | 117 :param mySpecieFolder: |
| 109 array_call = ['faToTwoBit', fasta_file_name, twoBitFile] | 121 array_call = ['faToTwoBit', fasta_file_name, twoBitFile] |
| 110 _handleExceptionAndCheckCall(array_call) | 122 _handleExceptionAndCheckCall(array_call) |
| 111 | 123 |
| 112 return twoBitFile | 124 return twoBitFile |
| 113 | 125 |
| 126 | |
| 114 def gtfToGenePred(input_gtf_file_name, gene_pred_file_name): | 127 def gtfToGenePred(input_gtf_file_name, gene_pred_file_name): |
| 115 """ | 128 """ |
| 116 Call gtfToGenePred and write the result into gene_pred_file_name | 129 Call gtfToGenePred and write the result into gene_pred_file_name |
| 117 :param input_gtf_file_name: | 130 :param input_gtf_file_name: |
| 118 :param gene_pred_file_name: | 131 :param gene_pred_file_name: |
| 120 """ | 133 """ |
| 121 array_call = ['gtfToGenePred', input_gtf_file_name, gene_pred_file_name] | 134 array_call = ['gtfToGenePred', input_gtf_file_name, gene_pred_file_name] |
| 122 p = _handleExceptionAndCheckCall(array_call) | 135 p = _handleExceptionAndCheckCall(array_call) |
| 123 return p | 136 return p |
| 124 | 137 |
| 138 | |
| 125 def gff3ToGenePred(input_gff3_file_name, gene_pred_file_name): | 139 def gff3ToGenePred(input_gff3_file_name, gene_pred_file_name): |
| 126 """ | 140 """ |
| 127 Call gff3ToGenePred and write the result into gene_pred_file_name | 141 Call gff3ToGenePred and write the result into gene_pred_file_name |
| 128 :param input_gff3_file_name: | 142 :param input_gff3_file_name: |
| 129 :param gene_pred_file_name: | 143 :param gene_pred_file_name: |
| 130 :return: | 144 :return: |
| 131 """ | 145 """ |
| 132 valid_gff3_file = tempfile.NamedTemporaryFile(bufsize=0, suffix=".gff3") | 146 array_call = ['gff3ToGenePred', input_gff3_file_name, gene_pred_file_name] |
| 133 validateGff(input_gff3_file_name, valid_gff3_file.name) | 147 p = _handleExceptionAndCheckCall(array_call) |
| 134 array_call = ['gff3ToGenePred', valid_gff3_file.name, gene_pred_file_name] | 148 return p |
| 135 p = _handleExceptionAndCheckCall(array_call) | 149 |
| 136 return p | |
| 137 | 150 |
| 138 def genePredToBigGenePred(gene_pred_file_name, unsorted_bigGenePred_file_name): | 151 def genePredToBigGenePred(gene_pred_file_name, unsorted_bigGenePred_file_name): |
| 139 """ | 152 """ |
| 140 Call genePredToBigGenePred and write the result into unsorted_bigGenePred_file_name | 153 Call genePredToBigGenePred and write the result into unsorted_bigGenePred_file_name |
| 141 :param gene_pred_file_name: | 154 :param gene_pred_file_name: |
| 146 gene_pred_file_name, | 159 gene_pred_file_name, |
| 147 unsorted_bigGenePred_file_name] | 160 unsorted_bigGenePred_file_name] |
| 148 p = _handleExceptionAndCheckCall(array_call) | 161 p = _handleExceptionAndCheckCall(array_call) |
| 149 return p | 162 return p |
| 150 | 163 |
| 164 | |
| 151 def genePredToBed(gene_pred_file_name, unsorted_bed_file_name): | 165 def genePredToBed(gene_pred_file_name, unsorted_bed_file_name): |
| 152 """ | 166 """ |
| 153 Call genePredToBed and write the result into unsorted_bed_file_name | 167 Call genePredToBed and write the result into unsorted_bed_file_name |
| 154 :param gene_pred_file_name: | 168 :param gene_pred_file_name: |
| 155 :param unsorted_bed_file_name: | 169 :param unsorted_bed_file_name: |
| 157 """ | 171 """ |
| 158 array_call = ['genePredToBed', gene_pred_file_name, unsorted_bed_file_name] | 172 array_call = ['genePredToBed', gene_pred_file_name, unsorted_bed_file_name] |
| 159 p = _handleExceptionAndCheckCall(array_call) | 173 p = _handleExceptionAndCheckCall(array_call) |
| 160 return p | 174 return p |
| 161 | 175 |
| 176 | |
| 162 def sort(unsorted_bed_file_name, sorted_bed_file_name): | 177 def sort(unsorted_bed_file_name, sorted_bed_file_name): |
| 163 """ | 178 """ |
| 164 Call sort with -k1,1 -k2,2n on unsorted_bed_file_name and write the result into sorted_bed_file_name | 179 Call sort with -k1,1 -k2,2n on unsorted_bed_file_name and write the result into sorted_bed_file_name |
| 165 :param unsorted_bed_file_name: | 180 :param unsorted_bed_file_name: |
| 166 :param sorted_bed_file_name: | 181 :param sorted_bed_file_name: |
| 167 :return: | 182 :return: |
| 168 """ | 183 """ |
| 169 array_call = ['sort', '-k', '1,1', '-k', '2,2n', unsorted_bed_file_name, '-o', sorted_bed_file_name] | 184 array_call = ['sort', '-k', '1,1', '-k', '2,2n', |
| 170 p = _handleExceptionAndCheckCall(array_call) | 185 unsorted_bed_file_name, '-o', sorted_bed_file_name] |
| 171 return p | 186 p = _handleExceptionAndCheckCall(array_call) |
| 187 return p | |
| 188 | |
| 172 | 189 |
| 173 def sortChromSizes(two_bit_info_file_name, chrom_sizes_file_name): | 190 def sortChromSizes(two_bit_info_file_name, chrom_sizes_file_name): |
| 174 """ | 191 """ |
| 175 Call sort with -k2rn on two_bit_info_file_name and write the result into chrom_sizes_file_name | 192 Call sort with -k2rn on two_bit_info_file_name and write the result into chrom_sizes_file_name |
| 176 :param two_bit_info_file_name: | 193 :param two_bit_info_file_name: |
| 177 :param chrom_sizes_file_name: | 194 :param chrom_sizes_file_name: |
| 178 :return: | 195 :return: |
| 179 """ | 196 """ |
| 180 array_call = ['sort', '-k2rn', two_bit_info_file_name, '-o', chrom_sizes_file_name] | 197 array_call = ['sort', '-k2rn', two_bit_info_file_name, |
| 181 p = _handleExceptionAndCheckCall(array_call) | 198 '-o', chrom_sizes_file_name] |
| 182 return p | 199 p = _handleExceptionAndCheckCall(array_call) |
| 183 | 200 return p |
| 184 def bedToBigBed(sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name, | 201 |
| 185 typeOption=None, autoSql=None, tab=False, extraIndex=None): | 202 |
| 203 def bedToBigBed(sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name, options=None): | |
| 186 """ | 204 """ |
| 187 Call bedToBigBed on sorted_bed_file_name, using chrom_sizes_file_name and write the result into big_bed_file_name | 205 Call bedToBigBed on sorted_bed_file_name, using chrom_sizes_file_name and write the result into big_bed_file_name |
| 188 :param sorted_bed_file_name: | 206 :param sorted_bed_file_name: |
| 189 :param chrom_sizes_file_name: | 207 :param chrom_sizes_file_name: |
| 190 :param big_bed_file_name: | 208 :param big_bed_file_name: |
| 191 :return: | 209 :return: |
| 192 """ | 210 """ |
| 193 | 211 |
| 194 # TODO: Move this into the _handleExceptionAndCheckCall function | 212 array_call = ['bedToBigBed', sorted_bed_file_name, |
| 195 # Parse the array | 213 chrom_sizes_file_name, big_bed_file_name] |
| 196 logging.debug("sorted_bed_file_name: {0}".format(sorted_bed_file_name)) | 214 if options: |
| 197 logging.debug("chrom_sizes_file_name: {0}".format(chrom_sizes_file_name)) | 215 typeOption = options.get("typeOption") |
| 198 logging.debug("big_bed_file_name: {0}".format(big_bed_file_name)) | 216 autoSql = options.get("autoSql") |
| 199 logging.debug("typeOption: {0}".format(typeOption)) | 217 tab = options.get("tab") |
| 200 logging.debug("autoSql: {0}".format(autoSql)) | 218 extraIndex = options.get("extraIndex") |
| 201 logging.debug("tab option: {0}".format(tab)) | 219 if typeOption: |
| 202 | 220 typeOption = ''.join(['-type=', typeOption]) |
| 203 array_call = ['bedToBigBed', sorted_bed_file_name, chrom_sizes_file_name, big_bed_file_name] | 221 array_call.append(typeOption) |
| 204 if typeOption: | 222 if autoSql: |
| 205 typeOption = ''.join(['-type=', typeOption]) | 223 autoSql = ''.join(['-as=', autoSql]) |
| 206 array_call.append(typeOption) | 224 array_call.append(autoSql) |
| 207 if autoSql: | 225 if tab: |
| 208 autoSql = ''.join(['-as=', autoSql]) | 226 array_call.append('-tab') |
| 209 array_call.append(autoSql) | 227 if extraIndex: |
| 210 if tab: | 228 index = ''.join(['-extraIndex=', extraIndex]) |
| 211 array_call.append('-tab') | 229 array_call.append(index) |
| 212 if extraIndex: | |
| 213 index = ''.join(['-extraIndex=', extraIndex]) | |
| 214 array_call.append(index) | |
| 215 | |
| 216 p = _handleExceptionAndCheckCall(array_call) | 230 p = _handleExceptionAndCheckCall(array_call) |
| 217 return p | 231 return p |
| 218 | 232 |
| 219 def sortBam(input_bam_file_name, output_sorted_bam_name): | 233 def sortBam(input_bam_file_name, output_sorted_bam_name): |
| 220 """ | 234 """ |
| 221 Call samtools on input_bam_file_name and output the result in output_sorted_bam_name | 235 Call samtools on input_bam_file_name and output the result in output_sorted_bam_name |
| 222 :param input_bam_file_name: | 236 :param input_bam_file_name: |
| 223 :param output_sorted_bam_name: | 237 :param output_sorted_bam_name: |
| 224 :return: | 238 :return: |
| 225 """ | 239 """ |
| 226 array_call = ['samtools', 'sort', input_bam_file_name, '-o', output_sorted_bam_name] | 240 array_call = ['samtools', 'sort', |
| 227 p = _handleExceptionAndCheckCall(array_call) | 241 input_bam_file_name, '-o', output_sorted_bam_name] |
| 228 return p | 242 p = _handleExceptionAndCheckCall(array_call) |
| 243 return p | |
| 244 | |
| 229 | 245 |
| 230 def createBamIndex(input_sorted_bam_file_name, output_name_index_name): | 246 def createBamIndex(input_sorted_bam_file_name, output_name_index_name): |
| 231 """ | 247 """ |
| 232 Call `samtools index` on imput_sorted_bam_file_name and output the result in output_name_index_name | 248 Call `samtools index` on imput_sorted_bam_file_name and output the result in output_name_index_name |
| 233 :param input_sorted_bam_file_name: | 249 :param input_sorted_bam_file_name: |
| 234 :param output_name_index_name: | 250 :param output_name_index_name: |
| 235 :return: | 251 :return: |
| 236 """ | 252 """ |
| 237 array_call = ['samtools', 'index', input_sorted_bam_file_name, output_name_index_name] | 253 array_call = ['samtools', 'index', |
| 238 p = _handleExceptionAndCheckCall(array_call) | 254 input_sorted_bam_file_name, output_name_index_name] |
| 239 return p | 255 p = _handleExceptionAndCheckCall(array_call) |
| 256 return p | |
| 257 | |
| 240 | 258 |
| 241 def pslToBigPsl(input_psl_file_name, output_bed12_file_name): | 259 def pslToBigPsl(input_psl_file_name, output_bed12_file_name): |
| 242 """ | 260 """ |
| 243 Call `pslToBigPsl` on input_psl_file_name and output the result in output_bed12_file_name | 261 Call `pslToBigPsl` on input_psl_file_name and output the result in output_bed12_file_name |
| 244 :param input_psl_file_name: Name of the psl input file | 262 :param input_psl_file_name: Name of the psl input file |
| 249 array_call = ['pslToBigPsl', input_psl_file_name, output_bed12_file_name] | 267 array_call = ['pslToBigPsl', input_psl_file_name, output_bed12_file_name] |
| 250 | 268 |
| 251 p = _handleExceptionAndCheckCall(array_call) | 269 p = _handleExceptionAndCheckCall(array_call) |
| 252 return p | 270 return p |
| 253 | 271 |
| 254 #santitize trackName. Because track name must begin with a letter and | 272 # santitize trackName. Because track name must begin with a letter and |
| 255 # contain only the following chars: [a-zA-Z0-9_]. | 273 # contain only the following chars: [a-zA-Z0-9_]. |
| 256 # See the "track" Common settings at: | 274 # See the "track" Common settings at: |
| 257 #https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html#bigPsl_-_Pairwise_Alignments | 275 # https://genome.ucsc.edu/goldenpath/help/trackDb/trackDbHub.html#bigPsl_-_Pairwise_Alignments |
| 258 def fixName(filename): | 276 |
| 259 if filename == 'cytoBandIdeo': | 277 def validateFiles(input_file, chrom_sizes_file_name, file_type, options=None): |
| 260 return filename | 278 """ |
| 261 valid_chars = "_%s%s" % (string.ascii_letters, string.digits) | 279 Call validateFiles on input_file, using chrom_sizes_file_name and file_type |
| 262 sanitize_name = ''.join([c if c in valid_chars else '_' for c in filename]) | 280 :param input_file: |
| 263 sanitize_name = "gonramp_" + sanitize_name | 281 :param chrom_sizes_file_name: |
| 264 return sanitize_name | 282 :param file_type: |
| 265 | 283 :return: |
| 266 def validateGff(orig_gff3, valid_gff3): | 284 """ |
| 267 """ | 285 |
| 268 Remove extra meta line: ##gff-version 3 | 286 array_call = ['validateFiles', '-chromInfo=' + chrom_sizes_file_name, '-type='+ file_type, input_file] |
| 269 """ | 287 if options: |
| 270 valid = open(valid_gff3, 'w') | 288 tab = options.get("tab") |
| 271 num = 0 | 289 autoSql = options.get("autoSql") |
| 272 with open(orig_gff3, 'r') as f: | 290 logging.debug("tab: {0}".format(tab)) |
| 273 for line in f: | 291 logging.debug("autoSql: {0}".format(autoSql)) |
| 274 if '##gff-version 3' in line: | 292 if autoSql: |
| 275 if num == 0: | 293 autoSql = ''.join(['-as=', autoSql]) |
| 276 num += 1 | 294 array_call.append(autoSql) |
| 277 else: | 295 if tab: |
| 278 continue | 296 array_call.append('-tab') |
| 279 valid.write(line) | 297 p = _handleExceptionAndCheckCall(array_call) |
| 280 | 298 return p |
| 299 | |
| 300 def pslCheck(input_file, options=None): | |
| 301 """ | |
| 302 Call pslCheck on input_file | |
| 303 :param input_file: | |
| 304 :return: | |
| 305 """ | |
| 306 | |
| 307 array_call = ['pslCheck', input_file] | |
| 308 p = _handleExceptionAndCheckCall(array_call) | |
| 309 return p | |
| 310 | |
| 311 | |
| 312 |
