comparison cuffmerge_wrapper.py @ 0:e556721b9872 draft

Uploaded
author devteam
date Wed, 26 Nov 2014 13:58:56 -0500
parents
children 0ed8b7f6d506
comparison
equal deleted inserted replaced
-1:000000000000 0:e556721b9872
1 #!/usr/bin/env python
2
3 import optparse, os, shutil, subprocess, sys, tempfile
4
5 def stop_err( msg ):
6 sys.stderr.write( '%s\n' % msg )
7 sys.exit()
8
9 def __main__():
10 #Parse Command Line
11 parser = optparse.OptionParser()
12 parser.add_option( '-g', dest='ref_annotation', help='An optional "reference" annotation GTF. Each sample is matched against this file, and sample isoforms are tagged as overlapping, matching, or novel where appropriate. See the refmap and tmap output file descriptions below.' )
13 parser.add_option( '-s', dest='use_seq_data', action="store_true", help='Causes cuffmerge to look into for fasta files with the underlying genomic sequences (one file per contig) against which your reads were aligned for some optional classification functions. For example, Cufflinks transcripts consisting mostly of lower-case bases are classified as repeats. Note that <seq_dir> must contain one fasta file per reference chromosome, and each file must be named after the chromosome, and have a .fa or .fasta extension.')
14 parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
15
16 # Wrapper / Galaxy options.
17 parser.add_option( '', '--index', dest='index', help='The path of the reference genome' )
18 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
19
20 # Outputs.
21 parser.add_option( '', '--merged-transcripts', dest='merged_transcripts' )
22 parser.add_option( '--min-isoform-fraction', dest='min_isoform_fraction' )
23
24 (options, args) = parser.parse_args()
25
26 # output version # of tool
27 try:
28 tmp = tempfile.NamedTemporaryFile().name
29 tmp_stdout = open( tmp, 'wb' )
30 proc = subprocess.Popen( args='cuffmerge -v 2>&1', shell=True, stdout=tmp_stdout )
31 tmp_stdout.close()
32 returncode = proc.wait()
33 stdout = None
34 for line in open( tmp_stdout.name, 'rb' ):
35 if line.lower().find( 'merge_cuff_asms v' ) >= 0:
36 stdout = line.strip()
37 break
38 if stdout:
39 sys.stdout.write( '%s\n' % stdout )
40 else:
41 raise Exception
42 except:
43 sys.stdout.write( 'Could not determine Cuffmerge version\n' )
44
45 # Set/link to sequence file.
46 if options.use_seq_data:
47 if options.ref_file:
48 # Sequence data from history.
49 # Create symbolic link to ref_file so that index will be created in working directory.
50 seq_path = "ref.fa"
51 os.symlink( options.ref_file, seq_path )
52 else:
53 if not os.path.exists( options.index ):
54 stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index )
55 seq_path = options.index
56
57 # Build command.
58
59 # Base.
60 cmd = "cuffmerge -o cm_output "
61
62 # Add options.
63 if options.num_threads:
64 cmd += ( " -p %i " % int ( options.num_threads ) )
65 if options.ref_annotation:
66 cmd += " -g %s " % options.ref_annotation
67 if options.use_seq_data:
68 cmd += " -s %s " % seq_path
69 if options.min_isoform_fraction:
70 cmd += " --min-isoform-fraction %s " % (options.min_isoform_fraction)
71 # Add input files to a file.
72 inputs_file_name = tempfile.NamedTemporaryFile( dir="." ).name
73 inputs_file = open( inputs_file_name, 'w' )
74 for arg in args:
75 inputs_file.write( arg + "\n" )
76 inputs_file.close()
77 cmd += inputs_file_name
78
79 # Debugging.
80 print cmd
81
82 # Run command.
83 try:
84 tmp_name = tempfile.NamedTemporaryFile( dir="." ).name
85 tmp_stderr = open( tmp_name, 'wb' )
86 proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
87 returncode = proc.wait()
88 tmp_stderr.close()
89
90 # Get stderr, allowing for case where it's very large.
91 tmp_stderr = open( tmp_name, 'rb' )
92 stderr = ''
93 buffsize = 1048576
94 try:
95 while True:
96 stderr += tmp_stderr.read( buffsize )
97 if not stderr or len( stderr ) % buffsize != 0:
98 break
99 except OverflowError:
100 pass
101 tmp_stderr.close()
102
103 # Error checking.
104 if returncode != 0:
105 raise Exception, stderr
106
107 if len( open( "cm_output/merged.gtf", 'rb' ).read().strip() ) == 0:
108 raise Exception, 'The output file is empty, there may be an error with your input file or settings.'
109
110 # Copy outputs.
111 shutil.copyfile( "cm_output/merged.gtf" , options.merged_transcripts )
112
113 except Exception, e:
114 stop_err( 'Error running cuffmerge. ' + str( e ) )
115
116 if __name__=="__main__": __main__()