annotate sam_pileup.py @ 1:efd77ef84903 draft

planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
author devteam
date Tue, 13 Oct 2015 12:30:34 -0400
parents cb3966035753
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
1 #!/usr/bin/env python
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
2
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
3 """
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
4 Creates a pileup file from a bam file and a reference.
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
5
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
6 usage: %prog [options]
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
7 -p, --input1=p: bam file
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
8 -o, --output1=o: Output pileup
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
9 -R, --ref=R: Reference file type
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
10 -n, --ownFile=n: User-supplied fasta reference file
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
11 -b, --bamIndex=b: BAM index file
1
efd77ef84903 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
12 -g, --index=g: Path of the indexed reference genome
0
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
13 -s, --lastCol=s: Print the mapping quality as the last column
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
14 -i, --indels=i: Only output lines containing indels
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
15 -M, --mapCap=M: Cap mapping quality
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
16 -c, --consensus=c: Call the consensus sequence using MAQ consensu model
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
17 -T, --theta=T: Theta paramter (error dependency coefficient)
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
18 -N, --hapNum=N: Number of haplotypes in sample
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
19 -r, --fraction=r: Expected fraction of differences between a pair of haplotypes
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
20 -I, --phredProb=I: Phred probability of an indel in sequencing/prep
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
21
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
22 """
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
23
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
24 import os, shutil, subprocess, sys, tempfile
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
25 from galaxy import eggs
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
26 import pkg_resources; pkg_resources.require( "bx-python" )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
27 from bx.cookbook import doc_optparse
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
28
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
29 def stop_err( msg ):
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
30 sys.stderr.write( '%s\n' % msg )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
31 sys.exit()
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
32
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
33 def __main__():
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
34 #Parse Command Line
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
35 options, args = doc_optparse.parse( __doc__ )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
36 # output version # of tool
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
37 try:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
38 tmp = tempfile.NamedTemporaryFile().name
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
39 tmp_stdout = open( tmp, 'wb' )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
40 proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
41 tmp_stdout.close()
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
42 returncode = proc.wait()
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
43 stdout = None
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
44 for line in open( tmp_stdout.name, 'rb' ):
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
45 if line.lower().find( 'version' ) >= 0:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
46 stdout = line.strip()
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
47 break
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
48 if stdout:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
49 sys.stdout.write( 'Samtools %s\n' % stdout )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
50 else:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
51 raise Exception
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
52 except:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
53 sys.stdout.write( 'Could not determine Samtools version\n' )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
54 #prepare file names
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
55 tmpDir = tempfile.mkdtemp()
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
56 tmpf0 = tempfile.NamedTemporaryFile( dir=tmpDir )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
57 tmpf0_name = tmpf0.name
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
58 tmpf0.close()
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
59 tmpf0bam_name = '%s.bam' % tmpf0_name
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
60 tmpf0bambai_name = '%s.bam.bai' % tmpf0_name
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
61 tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
62 tmpf1_name = tmpf1.name
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
63 tmpf1.close()
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
64 #link bam and bam index to working directory (can't move because need to leave original)
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
65 os.symlink( options.input1, tmpf0bam_name )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
66 os.symlink( options.bamIndex, tmpf0bambai_name )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
67 #get parameters for pileup command
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
68 if options.lastCol == 'yes':
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
69 lastCol = '-s'
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
70 else:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
71 lastCol = ''
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
72 if options.indels == 'yes':
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
73 indels = '-i'
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
74 else:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
75 indels = ''
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
76 opts = '%s %s -M %s' % ( lastCol, indels, options.mapCap )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
77 if options.consensus == 'yes':
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
78 opts += ' -c -T %s -N %s -r %s -I %s' % ( options.theta, options.hapNum, options.fraction, options.phredProb )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
79 #prepare basic pileup command
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
80 cmd = 'samtools pileup %s -f %s %s > %s'
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
81 try:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
82 # have to nest try-except in try-finally to handle 2.4
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
83 try:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
84 #index reference if necessary and prepare pileup command
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
85 if options.ref == 'indexed':
1
efd77ef84903 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
86 if not os.path.exists( "%s.fai" % options.index ):
efd77ef84903 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
87 raise Exception, "Indexed genome %s not present, request it by reporting this error." % options.index
efd77ef84903 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734
devteam
parents: 0
diff changeset
88 cmd = cmd % ( opts, options.index, tmpf0bam_name, options.output1 )
0
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
89 elif options.ref == 'history':
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
90 os.symlink( options.ownFile, tmpf1_name )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
91 cmdIndex = 'samtools faidx %s' % ( tmpf1_name )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
92 tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
93 tmp_stderr = open( tmp, 'wb' )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
94 proc = subprocess.Popen( args=cmdIndex, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
95 returncode = proc.wait()
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
96 tmp_stderr.close()
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
97 # get stderr, allowing for case where it's very large
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
98 tmp_stderr = open( tmp, 'rb' )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
99 stderr = ''
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
100 buffsize = 1048576
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
101 try:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
102 while True:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
103 stderr += tmp_stderr.read( buffsize )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
104 if not stderr or len( stderr ) % buffsize != 0:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
105 break
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
106 except OverflowError:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
107 pass
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
108 tmp_stderr.close()
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
109 #did index succeed?
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
110 if returncode != 0:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
111 raise Exception, 'Error creating index file\n' + stderr
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
112 cmd = cmd % ( opts, tmpf1_name, tmpf0bam_name, options.output1 )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
113 #perform pileup command
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
114 tmp = tempfile.NamedTemporaryFile( dir=tmpDir ).name
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
115 tmp_stderr = open( tmp, 'wb' )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
116 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmpDir, stderr=tmp_stderr.fileno() )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
117 returncode = proc.wait()
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
118 tmp_stderr.close()
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
119 #did it succeed?
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
120 # get stderr, allowing for case where it's very large
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
121 tmp_stderr = open( tmp, 'rb' )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
122 stderr = ''
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
123 buffsize = 1048576
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
124 try:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
125 while True:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
126 stderr += tmp_stderr.read( buffsize )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
127 if not stderr or len( stderr ) % buffsize != 0:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
128 break
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
129 except OverflowError:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
130 pass
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
131 tmp_stderr.close()
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
132 if returncode != 0:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
133 raise Exception, stderr
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
134 except Exception, e:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
135 stop_err( 'Error running Samtools pileup tool\n' + str( e ) )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
136 finally:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
137 #clean up temp files
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
138 if os.path.exists( tmpDir ):
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
139 shutil.rmtree( tmpDir )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
140 # check that there are results in the output file
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
141 if os.path.getsize( options.output1 ) > 0:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
142 sys.stdout.write( 'Converted BAM to pileup' )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
143 else:
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
144 stop_err( 'The output file is empty. Your input file may have had no matches, or there may be an error with your input file or settings.' )
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
145
cb3966035753 Uploaded tool tarball.
devteam
parents:
diff changeset
146 if __name__ == "__main__" : __main__()