cufflinks: cufflinks_wrapper.py annotate

annotate cufflinks_wrapper.py @ 1:b9d29fdd1190 draft

planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734

author	devteam
date	Tue, 13 Oct 2015 12:37:52 -0400
parents	1fffcfe2fb35
children	a6f581469476

rev	line source
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	1 #!/usr/bin/env python
1fffcfe2fb35 Uploaded devteam parents: diff changeset	2
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	3 import optparse
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	4 import os
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	5 import shutil
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	6 import subprocess
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	7 import sys
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	8 import tempfile
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	9 from galaxy.datatypes.util.gff_util import parse_gff_attributes, gff_attributes_to_str
1fffcfe2fb35 Uploaded devteam parents: diff changeset	10
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	11
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	12 def stop_err( msg ):
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	13 sys.exit( "%s\n" % msg )
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	14
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	15
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	16 def __main__():
1fffcfe2fb35 Uploaded devteam parents: diff changeset	17 #Parse Command Line
1fffcfe2fb35 Uploaded devteam parents: diff changeset	18 parser = optparse.OptionParser()
1fffcfe2fb35 Uploaded devteam parents: diff changeset	19 parser.add_option( '-1', '--input', dest='input', help=' file of RNA-Seq read alignments in the SAM format. SAM is a standard short read alignment, that allows aligners to attach custom tags to individual alignments, and Cufflinks requires that the alignments you supply have some of these tags. Please see Input formats for more details.' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	20 parser.add_option( '-I', '--max-intron-length', dest='max_intron_len', help='The minimum intron length. Cufflinks will not report transcripts with introns longer than this, and will ignore SAM alignments with REF_SKIP CIGAR operations longer than this. The default is 300,000.' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	21 parser.add_option( '-F', '--min-isoform-fraction', dest='min_isoform_fraction', help='After calculating isoform abundance for a gene, Cufflinks filters out transcripts that it believes are very low abundance, because isoforms expressed at extremely low levels often cannot reliably be assembled, and may even be artifacts of incompletely spliced precursors of processed transcripts. This parameter is also used to filter out introns that have far fewer spliced alignments supporting them. The default is 0.05, or 5% of the most abundant isoform (the major isoform) of the gene.' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	22 parser.add_option( '-j', '--pre-mrna-fraction', dest='pre_mrna_fraction', help='Some RNA-Seq protocols produce a significant amount of reads that originate from incompletely spliced transcripts, and these reads can confound the assembly of fully spliced mRNAs. Cufflinks uses this parameter to filter out alignments that lie within the intronic intervals implied by the spliced alignments. The minimum depth of coverage in the intronic region covered by the alignment is divided by the number of spliced reads, and if the result is lower than this parameter value, the intronic alignments are ignored. The default is 5%.' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	23 parser.add_option( '-p', '--num-threads', dest='num_threads', help='Use this many threads to align reads. The default is 1.' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	24 parser.add_option( '-G', '--GTF', dest='GTF', help='Tells Cufflinks to use the supplied reference annotation to estimate isoform expression. It will not assemble novel transcripts, and the program will ignore alignments not structurally compatible with any reference transcript.' )
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	25 parser.add_option("--compatible-hits-norm", dest='compatible_hits_norm', action="store_true", help='Count hits compatible with reference RNAs only')
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	26 parser.add_option( '-g', '--GTF-guide', dest='GTFguide', help='use reference transcript annotation to guide assembly' )
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	27 parser.add_option("--3-overhang-tolerance", dest='three_overhang_tolerance', help='The number of bp allowed to overhang the 3prime end of a reference transcript when determining if an assembled transcript should be merged with it (ie, the assembled transcript is not novel). The default is 600 bp.')
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	28 parser.add_option("--intron-overhang-tolerance", dest='intron_overhang_tolerance', help='The number of bp allowed to enter the intron of a reference transcript when determining if an assembled transcript should be merged with it (ie, the assembled transcript is not novel). The default is 50 bp.')
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	29 parser.add_option("--no-faux-reads", dest='no_faux_reads', help='This option disables tiling of the reference transcripts with faux reads. Use this if you only want to use sequencing reads in assembly but do not want to output assembled transcripts that lay within reference transcripts. All reference transcripts in the input annotation will also be included in the output.')
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	30 parser.add_option( '-u', '--multi-read-correct', dest='multi_read_correct', action="store_true", help='Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome')
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	31
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	32 # Normalization options.
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	33 parser.add_option( "--no-effective-length-correction", dest="no_effective_length_correction", action="store_true" )
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	34 parser.add_option( "--no-length-correction", dest="no_length_correction", action="store_true" )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	35
1fffcfe2fb35 Uploaded devteam parents: diff changeset	36 # Wrapper / Galaxy options.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	37 parser.add_option( '-A', '--assembled-isoforms-output', dest='assembled_isoforms_output_file', help='Assembled isoforms output file; formate is GTF.' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	38
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	39 # Advanced Options:
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	40 parser.add_option( "--library-type", dest="library_type", help=' library prep used for input reads, default fr-unstranded')
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	41 parser.add_option( '-M', '--mask-file', dest='mask_file', help='Tells Cufflinks to ignore all reads that could have come from transcripts in this GTF file. \
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	42 We recommend including any annotated rRNA, mitochondrial transcripts other abundant transcripts \
1fffcfe2fb35 Uploaded devteam parents: diff changeset	43 you wish to ignore in your analysis in this file. Due to variable efficiency of mRNA enrichment \
1fffcfe2fb35 Uploaded devteam parents: diff changeset	44 methods and rRNA depletion kits, masking these transcripts often improves the overall robustness \
1fffcfe2fb35 Uploaded devteam parents: diff changeset	45 of transcript abundance estimates.')
1fffcfe2fb35 Uploaded devteam parents: diff changeset	46 parser.add_option( '-m', '--inner-mean-dist', dest='inner_mean_dist', help='This is the expected (mean) inner distance between mate pairs. \
1fffcfe2fb35 Uploaded devteam parents: diff changeset	47 For, example, for paired end runs with fragments selected at 300bp, \
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	48 where each end is 50bp, you should set -r to be 200. The default is 45bp.') # cufflinks: --frag-len-mean
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	49
1fffcfe2fb35 Uploaded devteam parents: diff changeset	50 parser.add_option( '-s', '--inner-dist-std-dev', dest='inner_dist_std_dev', help='The standard deviation for the distribution on inner distances between mate pairs. The default is 20bp.' ) # cufflinks: --frag-len-std-dev
1fffcfe2fb35 Uploaded devteam parents: diff changeset	51 parser.add_option( '--max-mle-iterations', dest='max_mle_iterations', help='Sets the number of iterations allowed during maximum likelihood estimation of abundances. Default: 5000' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	52 parser.add_option( '--junc-alpha', dest='junc_alpha', help='Alpha value for the binomial test used during false positive spliced alignment filtration. Default: 0.001' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	53 parser.add_option( '--small-anchor-fraction', dest='small_anchor_fraction', help='Spliced reads with less than this percent of their length on each side of\
1fffcfe2fb35 Uploaded devteam parents: diff changeset	54 the junction are considered suspicious and are candidates for filtering prior to assembly. Default: 0.09.' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	55 parser.add_option( '--overhang-tolerance', dest='overhang_tolerance', help='The number of bp allowed to enter the intron of a transcript when determining if a \
1fffcfe2fb35 Uploaded devteam parents: diff changeset	56 read or another transcript is mappable to/compatible with it. The default is 8 bp based on the default bowtie/TopHat parameters.' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	57 parser.add_option( '--max-bundle-length', dest='max_bundle_length', help='Maximum genomic length of a given bundle" help="Default: 3,500,000bp' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	58 parser.add_option( '--max-bundle-frags', dest='max_bundle_frags', help='Sets the maximum number of fragments a locus may have before being skipped. Skipped loci are listed in skipped.gtf. Default: 1,000,000' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	59 parser.add_option( '--min-intron-length', dest='min_intron_length', help='Minimal allowed intron size. Default: 50' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	60 parser.add_option( '--trim-3-avgcov-thresh', dest='trim_three_avgcov_thresh', help='Minimum average coverage required to attempt 3prime trimming. Default: 10' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	61 parser.add_option( '--trim-3-dropoff-frac', dest='trim_three_dropoff_frac', help='The fraction of average coverage below which to trim the 3prime end of an assembled transcript. Default: 0.1' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	62
1fffcfe2fb35 Uploaded devteam parents: diff changeset	63 # Bias correction options.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	64 parser.add_option( '-b', dest='do_bias_correction', action="store_true", help='Providing Cufflinks with a multifasta file via this option instructs it to run our new bias detection and correction algorithm which can significantly improve accuracy of transcript abundance estimates.')
1fffcfe2fb35 Uploaded devteam parents: diff changeset	65 parser.add_option( '', '--index', dest='index', help='The path of the reference genome' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	66 parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' )
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	67
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	68 # Global model (for trackster).
1fffcfe2fb35 Uploaded devteam parents: diff changeset	69 parser.add_option( '', '--global_model', dest='global_model_file', help='Global model used for computing on local data' )
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	70
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	71 (options, args) = parser.parse_args()
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	72
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	73 # output version # of tool
1fffcfe2fb35 Uploaded devteam parents: diff changeset	74 try:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	75 tmp = tempfile.NamedTemporaryFile().name
1fffcfe2fb35 Uploaded devteam parents: diff changeset	76 tmp_stdout = open( tmp, 'wb' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	77 proc = subprocess.Popen( args='cufflinks --no-update-check 2>&1', shell=True, stdout=tmp_stdout )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	78 tmp_stdout.close()
1fffcfe2fb35 Uploaded devteam parents: diff changeset	79 returncode = proc.wait()
1fffcfe2fb35 Uploaded devteam parents: diff changeset	80 stdout = None
1fffcfe2fb35 Uploaded devteam parents: diff changeset	81 for line in open( tmp_stdout.name, 'rb' ):
1fffcfe2fb35 Uploaded devteam parents: diff changeset	82 if line.lower().find( 'cufflinks v' ) >= 0:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	83 stdout = line.strip()
1fffcfe2fb35 Uploaded devteam parents: diff changeset	84 break
1fffcfe2fb35 Uploaded devteam parents: diff changeset	85 if stdout:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	86 sys.stdout.write( '%s\n' % stdout )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	87 else:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	88 raise Exception
1fffcfe2fb35 Uploaded devteam parents: diff changeset	89 except:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	90 sys.stdout.write( 'Could not determine Cufflinks version\n' )
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	91
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	92 # If doing bias correction, set/link to sequence file.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	93 if options.do_bias_correction:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	94 if options.ref_file:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	95 # Sequence data from history.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	96 # Create symbolic link to ref_file so that index will be created in working directory.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	97 seq_path = "ref.fa"
1fffcfe2fb35 Uploaded devteam parents: diff changeset	98 os.symlink( options.ref_file, seq_path )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	99 else:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	100 if not os.path.exists( options.index ):
1fffcfe2fb35 Uploaded devteam parents: diff changeset	101 stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	102 seq_path = options.index
1fffcfe2fb35 Uploaded devteam parents: diff changeset	103
1fffcfe2fb35 Uploaded devteam parents: diff changeset	104 # Build command.
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	105
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	106 # Base; always use quiet mode to avoid problems with storing log output.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	107 cmd = "cufflinks -q --no-update-check"
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	108
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	109 # Add options.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	110 if options.max_intron_len:
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	111 cmd += ( " -I %i" % int( options.max_intron_len ) )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	112 if options.min_isoform_fraction:
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	113 cmd += ( " -F %f" % float( options.min_isoform_fraction ) )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	114 if options.pre_mrna_fraction:
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	115 cmd += ( " -j %f" % float( options.pre_mrna_fraction ) )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	116 if options.num_threads:
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	117 cmd += ( " -p %i" % int( options.num_threads ) )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	118 if options.GTF:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	119 cmd += ( " -G %s" % options.GTF )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	120 if options.compatible_hits_norm:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	121 cmd += ( " --compatible-hits-norm" )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	122 if options.GTFguide:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	123 cmd += ( " -g %s" % options.GTFguide )
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	124 cmd += ( " --3-overhang-tolerance %i" % int( options.three_overhang_tolerance ) )
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	125 cmd += ( " --intron-overhang-tolerance %i" % int( options.intron_overhang_tolerance ) )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	126 if options.no_faux_reads:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	127 cmd += ( " --no-faux-reads" )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	128 if options.multi_read_correct:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	129 cmd += ( " -u" )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	130
1fffcfe2fb35 Uploaded devteam parents: diff changeset	131 if options.library_type and options.library_type != 'auto':
1fffcfe2fb35 Uploaded devteam parents: diff changeset	132 cmd += ( " --library-type %s" % options.library_type)
1fffcfe2fb35 Uploaded devteam parents: diff changeset	133 if options.mask_file:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	134 cmd += ( " --mask-file %s" % options.mask_file )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	135 if options.inner_mean_dist:
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	136 cmd += ( " -m %i" % int( options.inner_mean_dist ) )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	137 if options.inner_dist_std_dev:
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	138 cmd += ( " -s %i" % int( options.inner_dist_std_dev ) )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	139 if options.max_mle_iterations:
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	140 cmd += ( " --max-mle-iterations %i" % int( options.max_mle_iterations ) )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	141 if options.junc_alpha:
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	142 cmd += ( " --junc-alpha %f" % float( options.junc_alpha) )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	143 if options.small_anchor_fraction:
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	144 cmd += ( " --small-anchor-fraction %f" % float(options.small_anchor_fraction ) )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	145 if options.overhang_tolerance:
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	146 cmd += ( " --overhang-tolerance %i" % int( options.overhang_tolerance ) )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	147 if options.max_bundle_length:
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	148 cmd += ( " --max-bundle-length %i" % int( options.max_bundle_length ) )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	149 if options.max_bundle_frags:
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	150 cmd += ( " --max-bundle-frags %i" % int( options.max_bundle_frags ) )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	151 if options.min_intron_length:
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	152 cmd += ( " --min-intron-length %i" % int( options.min_intron_length ) )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	153 if options.trim_three_avgcov_thresh:
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	154 cmd += ( " --trim-3-avgcov-thresh %i" % int( options.trim_three_avgcov_thresh ) )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	155 if options.trim_three_dropoff_frac:
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	156 cmd += ( " --trim-3-dropoff-frac %f" % float( options.trim_three_dropoff_frac ) )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	157
1fffcfe2fb35 Uploaded devteam parents: diff changeset	158 if options.do_bias_correction:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	159 cmd += ( " -b %s" % seq_path )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	160 if options.no_effective_length_correction:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	161 cmd += ( " --no-effective-length-correction" )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	162 if options.no_length_correction:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	163 cmd += ( " --no-length-correction" )
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	164
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	165 # Add input files.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	166 cmd += " " + options.input
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	167
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	168 # Debugging.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	169 print cmd
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	170
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	171 #
1fffcfe2fb35 Uploaded devteam parents: diff changeset	172 # Run command and handle output.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	173 #
1fffcfe2fb35 Uploaded devteam parents: diff changeset	174 try:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	175 #
1fffcfe2fb35 Uploaded devteam parents: diff changeset	176 # Run command.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	177 #
1fffcfe2fb35 Uploaded devteam parents: diff changeset	178 tmp_name = tempfile.NamedTemporaryFile( dir="." ).name
1fffcfe2fb35 Uploaded devteam parents: diff changeset	179 tmp_stderr = open( tmp_name, 'wb' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	180 proc = subprocess.Popen( args=cmd, shell=True, stderr=tmp_stderr.fileno() )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	181 returncode = proc.wait()
1fffcfe2fb35 Uploaded devteam parents: diff changeset	182 tmp_stderr.close()
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	183
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	184 # Error checking.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	185 if returncode != 0:
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	186 raise Exception("return code = %i" % returncode)
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	187
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	188 #
1fffcfe2fb35 Uploaded devteam parents: diff changeset	189 # Handle output.
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	190 #
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	191
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	192 # Read standard error to get total map/upper quartile mass.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	193 total_map_mass = -1
1fffcfe2fb35 Uploaded devteam parents: diff changeset	194 tmp_stderr = open( tmp_name, 'r' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	195 for line in tmp_stderr:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	196 if line.lower().find( "map mass" ) >= 0 or line.lower().find( "upper quartile" ) >= 0:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	197 total_map_mass = float( line.split(":")[1].strip() )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	198 break
1fffcfe2fb35 Uploaded devteam parents: diff changeset	199 tmp_stderr.close()
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	200
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	201 #
1fffcfe2fb35 Uploaded devteam parents: diff changeset	202 # If there's a global model provided, use model's total map mass
1fffcfe2fb35 Uploaded devteam parents: diff changeset	203 # to adjust FPKM + confidence intervals.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	204 #
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	205 if options.global_model_file:
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	206 # Global model is simply total map mass from original run.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	207 global_model_file = open( options.global_model_file, 'r' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	208 global_model_total_map_mass = float( global_model_file.readline() )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	209 global_model_file.close()
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	210
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	211 # Ratio of global model's total map mass to original run's map mass is
1fffcfe2fb35 Uploaded devteam parents: diff changeset	212 # factor used to adjust FPKM.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	213 fpkm_map_mass_ratio = total_map_mass / global_model_total_map_mass
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	214
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	215 # Update FPKM values in transcripts.gtf file.
1fffcfe2fb35 Uploaded devteam parents: diff changeset	216 transcripts_file = open( "transcripts.gtf", 'r' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	217 tmp_transcripts = tempfile.NamedTemporaryFile( dir="." ).name
1fffcfe2fb35 Uploaded devteam parents: diff changeset	218 new_transcripts_file = open( tmp_transcripts, 'w' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	219 for line in transcripts_file:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	220 fields = line.split( '\t' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	221 attrs = parse_gff_attributes( fields[8] )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	222 attrs[ "FPKM" ] = str( float( attrs[ "FPKM" ] ) * fpkm_map_mass_ratio )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	223 attrs[ "conf_lo" ] = str( float( attrs[ "conf_lo" ] ) * fpkm_map_mass_ratio )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	224 attrs[ "conf_hi" ] = str( float( attrs[ "conf_hi" ] ) * fpkm_map_mass_ratio )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	225 fields[8] = gff_attributes_to_str( attrs, "GTF" )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	226 new_transcripts_file.write( "%s\n" % '\t'.join( fields ) )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	227 transcripts_file.close()
1fffcfe2fb35 Uploaded devteam parents: diff changeset	228 new_transcripts_file.close()
1fffcfe2fb35 Uploaded devteam parents: diff changeset	229 shutil.copyfile( tmp_transcripts, "transcripts.gtf" )
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	230
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	231 # TODO: update expression files as well.
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	232
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	233 # Set outputs. Transcript and gene expression handled by wrapper directives.
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	234 shutil.copyfile( "transcripts.gtf", options.assembled_isoforms_output_file )
0 1fffcfe2fb35 Uploaded devteam parents: diff changeset	235 if total_map_mass > -1:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	236 f = open( "global_model.txt", 'w' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	237 f.write( "%f\n" % total_map_mass )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	238 f.close()
1fffcfe2fb35 Uploaded devteam parents: diff changeset	239 except Exception, e:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	240 # Read stderr so that it can be reported:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	241 tmp_stderr = open( tmp_name, 'rb' )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	242 stderr = ''
1fffcfe2fb35 Uploaded devteam parents: diff changeset	243 buffsize = 1048576
1fffcfe2fb35 Uploaded devteam parents: diff changeset	244 try:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	245 while True:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	246 stderr += tmp_stderr.read( buffsize )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	247 if not stderr or len( stderr ) % buffsize != 0:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	248 break
1fffcfe2fb35 Uploaded devteam parents: diff changeset	249 except OverflowError:
1fffcfe2fb35 Uploaded devteam parents: diff changeset	250 pass
1fffcfe2fb35 Uploaded devteam parents: diff changeset	251 tmp_stderr.close()
1fffcfe2fb35 Uploaded devteam parents: diff changeset	252 stop_err( 'Error running cufflinks.\n%s\n%s' % ( str( e ), stderr ) )
1fffcfe2fb35 Uploaded devteam parents: diff changeset	253
1 b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	254 if __name__ == "__main__":
b9d29fdd1190 planemo upload commit 33927a87ba2eee9bf0ecdd376a66241b17b3d734 devteam parents: 0 diff changeset	255 __main__()

Mercurial > repos > devteam > cufflinks

annotate cufflinks_wrapper.py @ 1:b9d29fdd1190 draft