Mercurial > repos > iuc > meme_fimo
comparison fimo_wrapper.py @ 0:27c742760a84 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/meme commit e96df94dba60050fa28aaf55b5bb095717a5f260
| author | iuc |
|---|---|
| date | Tue, 22 Dec 2015 16:59:48 -0500 |
| parents | |
| children | 8d8905e040cf |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:27c742760a84 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import argparse | |
| 3 import os | |
| 4 import shutil | |
| 5 import string | |
| 6 import subprocess | |
| 7 import sys | |
| 8 import tempfile | |
| 9 | |
| 10 BUFFSIZE = 1048576 | |
| 11 # Translation table for reverse Complement, with ambiguity codes. | |
| 12 DNA_COMPLEMENT = string.maketrans("ACGTRYKMBDHVacgtrykmbdhv", "TGCAYRMKVHDBtgcayrmkvhdb") | |
| 13 | |
| 14 | |
| 15 def reverse(sequence): | |
| 16 # Reverse sequence string. | |
| 17 return sequence[::-1] | |
| 18 | |
| 19 | |
| 20 def dna_complement(sequence): | |
| 21 # Complement DNA sequence string. | |
| 22 return sequence.translate(DNA_COMPLEMENT) | |
| 23 | |
| 24 | |
| 25 def dna_reverse_complement(sequence): | |
| 26 # Returns the reverse complement of the sequence. | |
| 27 sequence = reverse(sequence) | |
| 28 return dna_complement(sequence) | |
| 29 | |
| 30 | |
| 31 def stop_err(msg): | |
| 32 sys.stderr.write(msg) | |
| 33 sys.exit(1) | |
| 34 | |
| 35 parser = argparse.ArgumentParser() | |
| 36 parser.add_argument('--input_motifs', dest='input_motifs', help='MEME output formatted files for input to fimo') | |
| 37 parser.add_argument('--input_fasta', dest='input_fasta', help='Fassta sequence file') | |
| 38 parser.add_argument('--options_type', dest='options_type', help='Basic or Advance options') | |
| 39 parser.add_argument('--input_psp', dest='input_psp', default=None, help='File containing position specific priors') | |
| 40 parser.add_argument('--input_prior_dist', dest='input_prior_dist', default=None, help='File containing binned distribution of priors') | |
| 41 parser.add_argument('--alpha', dest='alpha', type=float, default=1.0, help='The alpha parameter for calculating position specific priors') | |
| 42 parser.add_argument('--bgfile', dest='bgfile', default=None, help='Background file type, used only if not "default"') | |
| 43 parser.add_argument('--max_strand', action='store_true', help='If matches on both strands at a given position satisfy the output threshold, only report the match for the strand with the higher score') | |
| 44 parser.add_argument('--max_stored_scores', dest='max_stored_scores', type=int, help='Maximum score count to store') | |
| 45 parser.add_argument('--motif', dest='motifs', action='append', default=[], help='Specify motif by id') | |
| 46 parser.add_argument('--motif_pseudo', dest='motif_pseudo', type=float, default=0.1, help='Pseudocount to add to counts in motif matrix') | |
| 47 parser.add_argument('--no_qvalue', action='store_true', help='Do not compute a q-value for each p-value') | |
| 48 parser.add_argument('--norc', action='store_true', help='Do not score the reverse complement DNA strand') | |
| 49 parser.add_argument('--output_path', dest='output_path', help='Output files directory') | |
| 50 parser.add_argument('--parse_genomic_coord', action='store_true', help='Check each sequence header for UCSC style genomic coordinates') | |
| 51 parser.add_argument('--qv_thresh', action='store_true', help='Use q-values for the output threshold') | |
| 52 parser.add_argument('--thresh', dest='thresh', type=float, help='p-value threshold') | |
| 53 parser.add_argument('--gff_output', dest='gff_output', help='Gff output file') | |
| 54 parser.add_argument('--html_output', dest='html_output', help='HTML output file') | |
| 55 parser.add_argument('--interval_output', dest='interval_output', help='Interval output file') | |
| 56 parser.add_argument('--txt_output', dest='txt_output', help='Text output file') | |
| 57 parser.add_argument('--xml_output', dest='xml_output', help='XML output file') | |
| 58 args = parser.parse_args() | |
| 59 | |
| 60 fimo_cmd_list = ['fimo'] | |
| 61 if args.options_type == 'advanced': | |
| 62 fimo_cmd_list.append('--alpha %4f' % args.alpha) | |
| 63 if args.bgfile is not None: | |
| 64 fimo_cmd_list.append('--bgfile "%s"' % args.bgfile) | |
| 65 if args.max_strand: | |
| 66 fimo_cmd_list.append('--max-strand') | |
| 67 fimo_cmd_list.append('--max-stored-scores %d' % args.max_stored_scores) | |
| 68 if len(args.motifs) > 0: | |
| 69 for motif in args.motifs: | |
| 70 fimo_cmd_list.append('--motif "%s"' % motif) | |
| 71 fimo_cmd_list.append('--motif-pseudo %4f' % args.motif_pseudo) | |
| 72 if args.no_qvalue: | |
| 73 fimo_cmd_list.append('--no-qvalue') | |
| 74 if args.norc: | |
| 75 fimo_cmd_list.append('--norc') | |
| 76 if args.parse_genomic_coord: | |
| 77 fimo_cmd_list.append('--parse-genomic-coord') | |
| 78 if args.qv_thresh: | |
| 79 fimo_cmd_list.append('--qv-thresh') | |
| 80 fimo_cmd_list.append('--thresh %4f' % args.thresh) | |
| 81 if args.input_psp is not None: | |
| 82 fimo_cmd_list.append('--psp "%s"' % args.input_psp) | |
| 83 if args.input_prior_dist is not None: | |
| 84 fimo_cmd_list.append('--prior-dist "%s"' % args.input_prior_dist) | |
| 85 fimo_cmd_list.append('--o "%s"' % (args.output_path)) | |
| 86 fimo_cmd_list.append('--verbosity 1') | |
| 87 fimo_cmd_list.append(args.input_motifs) | |
| 88 fimo_cmd_list.append(args.input_fasta) | |
| 89 | |
| 90 fimo_cmd = ' '.join(fimo_cmd_list) | |
| 91 | |
| 92 try: | |
| 93 tmp_stderr = tempfile.NamedTemporaryFile() | |
| 94 proc = subprocess.Popen(args=fimo_cmd, shell=True, stderr=tmp_stderr) | |
| 95 returncode = proc.wait() | |
| 96 tmp_stderr.seek(0) | |
| 97 stderr = '' | |
| 98 try: | |
| 99 while True: | |
| 100 stderr += tmp_stderr.read(BUFFSIZE) | |
| 101 if not stderr or len(stderr) % BUFFSIZE != 0: | |
| 102 break | |
| 103 except OverflowError: | |
| 104 pass | |
| 105 if returncode != 0: | |
| 106 stop_err(stderr) | |
| 107 except Exception, e: | |
| 108 stop_err('Error running FIMO:\n%s' % str(e)) | |
| 109 | |
| 110 shutil.move(os.path.join(args.output_path, 'fimo.txt'), args.txt_output) | |
| 111 shutil.move(os.path.join(args.output_path, 'fimo.gff'), args.gff_output) | |
| 112 shutil.move(os.path.join(args.output_path, 'fimo.xml'), args.xml_output) | |
| 113 shutil.move(os.path.join(args.output_path, 'fimo.html'), args.html_output) | |
| 114 | |
| 115 out_file = open(args.interval_output, 'wb') | |
| 116 out_file.write("#%s\n" % "\t".join(("chr", "start", "end", "pattern name", "score", "strand", "matched sequence", "p-value", "q-value"))) | |
| 117 for line in open(args.txt_output): | |
| 118 if line.startswith('#'): | |
| 119 continue | |
| 120 fields = line.rstrip("\n\r").split("\t") | |
| 121 start, end = int(fields[2]), int(fields[3]) | |
| 122 sequence = fields[7] | |
| 123 if start > end: | |
| 124 # Flip start and end and set strand. | |
| 125 start, end = end, start | |
| 126 strand = "-" | |
| 127 # We want sequences relative to strand; FIMO always provides + stranded sequence. | |
| 128 sequence = dna_reverse_complement(sequence) | |
| 129 else: | |
| 130 strand = "+" | |
| 131 # Make 0-based start position. | |
| 132 start -= 1 | |
| 133 out_file.write("%s\n" % "\t".join([fields[1], str(start), str(end), fields[0], fields[4], strand, sequence, fields[5], fields[6]])) | |
| 134 out_file.close() |
