annotate cuffmerge_wrapper.py @ 0:e556721b9872 draft

Uploaded
author devteam
date Wed, 26 Nov 2014 13:58:56 -0500
parents
children 0ed8b7f6d506
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e556721b9872 Uploaded
devteam
parents:
diff changeset
1 #!/usr/bin/env python
e556721b9872 Uploaded
devteam
parents:
diff changeset
2
e556721b9872 Uploaded
devteam
parents:
diff changeset
3 import optparse, os, shutil, subprocess, sys, tempfile
e556721b9872 Uploaded
devteam
parents:
diff changeset
4
e556721b9872 Uploaded
devteam
parents:
diff changeset
5 def stop_err( msg ):
e556721b9872 Uploaded
devteam
parents:
diff changeset
6 sys.stderr.write( '%s\n' % msg )
e556721b9872 Uploaded
devteam
parents:
diff changeset
7 sys.exit()
e556721b9872 Uploaded
devteam
parents:
diff changeset
8
e556721b9872 Uploaded
devteam
parents:
diff changeset
9 def __main__():
e556721b9872 Uploaded
devteam
parents:
diff changeset
10 #Parse Command Line
e556721b9872 Uploaded
devteam
parents:
diff changeset
11 parser = optparse.OptionParser()
e556721b9872 Uploaded
devteam
parents:
diff changeset
12 parser.add_option( '-g', dest='ref_annotation', help='An optional "reference" annotation GTF. Each sample is matched against this file, and sample isoforms are tagged as overlapping, matching, or novel where appropriate. See the refmap and tmap output file descriptions below.' )
e556721b9872 Uploaded
devteam
parents:
diff changeset
13 parser.add_option( '-s', dest='use_seq_data', action="store_true", help='Causes cuffmerge to look into for fasta files with the underlying genomic sequences (one file per contig) against which your reads were aligned for some optional classification functions. For example, Cufflinks transcripts consisting mostly of lower-case bases are classified as repeats. Note that <seq_dir> must contain one fasta file per reference chromosome, and each file must be named after the chromosome, and have a .fa or .fasta extension.')
e556721b9872 Uploaded
devteam
parents:
diff changeset
14 parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
e556721b9872 Uploaded
devteam
parents:
diff changeset
15
e556721b9872 Uploaded
devteam
parents:
diff changeset
16 # Wrapper / Galaxy options.
e556721b9872 Uploaded
devteam
parents:
diff changeset
17 parser.add_option( '', '--index', dest='index', help='The path of the reference genome' )
e556721b9872 Uploaded
devteam
parents:
diff changeset
18 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
e556721b9872 Uploaded
devteam
parents:
diff changeset
19
e556721b9872 Uploaded
devteam
parents:
diff changeset
20 # Outputs.
e556721b9872 Uploaded
devteam
parents:
diff changeset
21 parser.add_option( '', '--merged-transcripts', dest='merged_transcripts' )
e556721b9872 Uploaded
devteam
parents:
diff changeset
22 parser.add_option( '--min-isoform-fraction', dest='min_isoform_fraction' )
e556721b9872 Uploaded
devteam
parents:
diff changeset
23
e556721b9872 Uploaded
devteam
parents:
diff changeset
24 (options, args) = parser.parse_args()
e556721b9872 Uploaded
devteam
parents:
diff changeset
25
e556721b9872 Uploaded
devteam
parents:
diff changeset
26 # output version # of tool
e556721b9872 Uploaded
devteam
parents:
diff changeset
27 try:
e556721b9872 Uploaded
devteam
parents:
diff changeset
28 tmp = tempfile.NamedTemporaryFile().name
e556721b9872 Uploaded
devteam
parents:
diff changeset
29 tmp_stdout = open( tmp, 'wb' )
e556721b9872 Uploaded
devteam
parents:
diff changeset
30 proc = subprocess.Popen( args='cuffmerge -v 2>&1', shell=True, stdout=tmp_stdout )
e556721b9872 Uploaded
devteam
parents:
diff changeset
31 tmp_stdout.close()
e556721b9872 Uploaded
devteam
parents:
diff changeset
32 returncode = proc.wait()
e556721b9872 Uploaded
devteam
parents:
diff changeset
33 stdout = None
e556721b9872 Uploaded
devteam
parents:
diff changeset
34 for line in open( tmp_stdout.name, 'rb' ):
e556721b9872 Uploaded
devteam
parents:
diff changeset
35 if line.lower().find( 'merge_cuff_asms v' ) >= 0:
e556721b9872 Uploaded
devteam
parents:
diff changeset
36 stdout = line.strip()
e556721b9872 Uploaded
devteam
parents:
diff changeset
37 break
e556721b9872 Uploaded
devteam
parents:
diff changeset
38 if stdout:
e556721b9872 Uploaded
devteam
parents:
diff changeset
39 sys.stdout.write( '%s\n' % stdout )
e556721b9872 Uploaded
devteam
parents:
diff changeset
40 else:
e556721b9872 Uploaded
devteam
parents:
diff changeset
41 raise Exception
e556721b9872 Uploaded
devteam
parents:
diff changeset
42 except:
e556721b9872 Uploaded
devteam
parents:
diff changeset
43 sys.stdout.write( 'Could not determine Cuffmerge version\n' )
e556721b9872 Uploaded
devteam
parents:
diff changeset
44
e556721b9872 Uploaded
devteam
parents:
diff changeset
45 # Set/link to sequence file.
e556721b9872 Uploaded
devteam
parents:
diff changeset
46 if options.use_seq_data:
e556721b9872 Uploaded
devteam
parents:
diff changeset
47 if options.ref_file:
e556721b9872 Uploaded
devteam
parents:
diff changeset
48 # Sequence data from history.
e556721b9872 Uploaded
devteam
parents:
diff changeset
49 # Create symbolic link to ref_file so that index will be created in working directory.
e556721b9872 Uploaded
devteam
parents:
diff changeset
50 seq_path = "ref.fa"
e556721b9872 Uploaded
devteam
parents:
diff changeset
51 os.symlink( options.ref_file, seq_path )
e556721b9872 Uploaded
devteam
parents:
diff changeset
52 else:
e556721b9872 Uploaded
devteam
parents:
diff changeset
53 if not os.path.exists( options.index ):
e556721b9872 Uploaded
devteam
parents:
diff changeset
54 stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index )
e556721b9872 Uploaded
devteam
parents:
diff changeset
55 seq_path = options.index
e556721b9872 Uploaded
devteam
parents:
diff changeset
56
e556721b9872 Uploaded
devteam
parents:
diff changeset
57 # Build command.
e556721b9872 Uploaded
devteam
parents:
diff changeset
58
e556721b9872 Uploaded
devteam
parents:
diff changeset
59 # Base.
e556721b9872 Uploaded
devteam
parents:
diff changeset
60 cmd = "cuffmerge -o cm_output "
e556721b9872 Uploaded
devteam
parents:
diff changeset
61
e556721b9872 Uploaded
devteam
parents:
diff changeset
62 # Add options.
e556721b9872 Uploaded
devteam
parents:
diff changeset
63 if options.num_threads:
e556721b9872 Uploaded
devteam
parents:
diff changeset
64 cmd += ( " -p %i " % int ( options.num_threads ) )
e556721b9872 Uploaded
devteam
parents:
diff changeset
65 if options.ref_annotation:
e556721b9872 Uploaded
devteam
parents:
diff changeset
66 cmd += " -g %s " % options.ref_annotation
e556721b9872 Uploaded
devteam
parents:
diff changeset
67 if options.use_seq_data:
e556721b9872 Uploaded
devteam
parents:
diff changeset
68 cmd += " -s %s " % seq_path
e556721b9872 Uploaded
devteam
parents:
diff changeset
69 if options.min_isoform_fraction:
e556721b9872 Uploaded
devteam
parents:
diff changeset
70 cmd += " --min-isoform-fraction %s " % (options.min_isoform_fraction)
e556721b9872 Uploaded
devteam
parents:
diff changeset
71 # Add input files to a file.
e556721b9872 Uploaded
devteam
parents:
diff changeset
72 inputs_file_name = tempfile.NamedTemporaryFile( dir="." ).name
e556721b9872 Uploaded
devteam
parents:
diff changeset
73 inputs_file = open( inputs_file_name, 'w' )
e556721b9872 Uploaded
devteam
parents:
diff changeset
74 for arg in args:
e556721b9872 Uploaded
devteam
parents:
diff changeset
75 inputs_file.write( arg + "\n" )
e556721b9872 Uploaded
devteam
parents:
diff changeset
76 inputs_file.close()
e556721b9872 Uploaded
devteam
parents:
diff changeset
77 cmd += inputs_file_name
e556721b9872 Uploaded
devteam
parents:
diff changeset
78
e556721b9872 Uploaded
devteam
parents:
diff changeset
79 # Debugging.
e556721b9872 Uploaded
devteam
parents:
diff changeset
80 print cmd
e556721b9872 Uploaded
devteam
parents:
diff changeset
81
e556721b9872 Uploaded
devteam
parents:
diff changeset
82 # Run command.
e556721b9872 Uploaded
devteam
parents:
diff changeset
83 try:
e556721b9872 Uploaded
devteam
parents:
diff changeset
84 tmp_name = tempfile.NamedTemporaryFile( dir="." ).name
e556721b9872 Uploaded
devteam
parents:
diff changeset
85 tmp_stderr = open( tmp_name, 'wb' )
e556721b9872 Uploaded
devteam
parents:
diff changeset
86 proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
e556721b9872 Uploaded
devteam
parents:
diff changeset
87 returncode = proc.wait()
e556721b9872 Uploaded
devteam
parents:
diff changeset
88 tmp_stderr.close()
e556721b9872 Uploaded
devteam
parents:
diff changeset
89
e556721b9872 Uploaded
devteam
parents:
diff changeset
90 # Get stderr, allowing for case where it's very large.
e556721b9872 Uploaded
devteam
parents:
diff changeset
91 tmp_stderr = open( tmp_name, 'rb' )
e556721b9872 Uploaded
devteam
parents:
diff changeset
92 stderr = ''
e556721b9872 Uploaded
devteam
parents:
diff changeset
93 buffsize = 1048576
e556721b9872 Uploaded
devteam
parents:
diff changeset
94 try:
e556721b9872 Uploaded
devteam
parents:
diff changeset
95 while True:
e556721b9872 Uploaded
devteam
parents:
diff changeset
96 stderr += tmp_stderr.read( buffsize )
e556721b9872 Uploaded
devteam
parents:
diff changeset
97 if not stderr or len( stderr ) % buffsize != 0:
e556721b9872 Uploaded
devteam
parents:
diff changeset
98 break
e556721b9872 Uploaded
devteam
parents:
diff changeset
99 except OverflowError:
e556721b9872 Uploaded
devteam
parents:
diff changeset
100 pass
e556721b9872 Uploaded
devteam
parents:
diff changeset
101 tmp_stderr.close()
e556721b9872 Uploaded
devteam
parents:
diff changeset
102
e556721b9872 Uploaded
devteam
parents:
diff changeset
103 # Error checking.
e556721b9872 Uploaded
devteam
parents:
diff changeset
104 if returncode != 0:
e556721b9872 Uploaded
devteam
parents:
diff changeset
105 raise Exception, stderr
e556721b9872 Uploaded
devteam
parents:
diff changeset
106
e556721b9872 Uploaded
devteam
parents:
diff changeset
107 if len( open( "cm_output/merged.gtf", 'rb' ).read().strip() ) == 0:
e556721b9872 Uploaded
devteam
parents:
diff changeset
108 raise Exception, 'The output file is empty, there may be an error with your input file or settings.'
e556721b9872 Uploaded
devteam
parents:
diff changeset
109
e556721b9872 Uploaded
devteam
parents:
diff changeset
110 # Copy outputs.
e556721b9872 Uploaded
devteam
parents:
diff changeset
111 shutil.copyfile( "cm_output/merged.gtf" , options.merged_transcripts )
e556721b9872 Uploaded
devteam
parents:
diff changeset
112
e556721b9872 Uploaded
devteam
parents:
diff changeset
113 except Exception, e:
e556721b9872 Uploaded
devteam
parents:
diff changeset
114 stop_err( 'Error running cuffmerge. ' + str( e ) )
e556721b9872 Uploaded
devteam
parents:
diff changeset
115
e556721b9872 Uploaded
devteam
parents:
diff changeset
116 if __name__=="__main__": __main__()