Mercurial > repos > trinity_ctat > testing_how_to_do_this
changeset 3:e9a4d694b591 draft
Deleted selected files
| author | trinity_ctat |
|---|---|
| date | Tue, 29 Aug 2017 12:01:52 -0400 |
| parents | 7c272fa77a8c |
| children | f4a429f40bfe |
| files | trinity_2_40/trinity.xml trinity_2_40/trinity_wrapper.py |
| diffstat | 2 files changed, 0 insertions(+), 298 deletions(-) [+] |
line wrap: on
line diff
--- a/trinity_2_40/trinity.xml Tue Aug 29 12:00:16 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,136 +0,0 @@ -<tool id="trinityrnaseq" name="Trinity" version="2.4.0"> - - <!-- Originally written by Jeremy Goecks, - later maintained by (in chronological order) - bhaas, Ben Fulton, Cicada Dennis - --> - <description>De novo assembly of RNA-Seq data using Trinity 2.4.0</description> - <requirements> - <requirement type="package" version="2.4.0">trinity</requirement> - </requirements> - <command> - <![CDATA[ - python $__tool_directory__/trinity_wrapper.py --mem_per_cpu 31 - --CPU \${GALAXY_SLOTS:-4} - #if str($inputs.paired_or_single) == "paired": - --left $inputs.left_input --right $inputs.right_input - #if $inputs.left_input.ext == 'fasta': - --seqType fa - #else: - --seqType fq - #end if - #else: - --single $inputs.input - #if $inputs.input.ext == 'fasta': - --seqType fa - #else: - --seqType fq - #end if - #end if - ## direct to output - --timing trinity_out_dir/Trinity.timing - --user $__user_id__ - --fullpath /N/dc2/scratch/tstrnity/rerun - --dir '$adv.rerundir' - --log $trinity_log - - ]]> - </command> - <stdio> - <exit_code range="1:" level="fatal" description="Program failed" /> - <exit_code range=":-1" level="fatal" description="DRM killed job" /> - </stdio> - <inputs> - <conditional name="inputs"> - <param name="paired_or_single" type="select" label="Paired or Single-end data?"> - <option value="paired">Paired</option> - <option value="single">Single</option> - </param> - <when value="paired"> - <param format="fasta,fastq" name="left_input" type="data" label="Left/Forward strand reads" help=""/> - <param format="fasta,fastq" name="right_input" type="data" label="Right/Reverse strand reads" help=""/> - </when> - <when value="single"> - <param format="fasta,fastq" name="input" type="data" label="Single-end reads" help=""/> - </when> - </conditional> - <section name="adv" title="Allow Job Rerun" expanded="False"> - <param name="rerundir" type="text" size="10" label="To make a job rerunnable, you will need to specify a unique tag to label the job, with no spaces or wierd characters." /> - </section> - </inputs> - <outputs> - <data format="txt" name="trinity_log" label="${tool.name} on ${on_string}: log" /> - <data format="fasta" name="assembled_transcripts" label="${tool.name} on ${on_string}: Assembled Transcripts" from_work_dir="trinity_out_dir/Trinity.fasta"/> - </outputs> - <tests> - <!-- Not testing with the following inputs anymore. - <param name="left_input" value="FLI1.left.fq" /> - <param name="right_input" value="FLI1.right.fq" /> - --> - <test> - <param name="paired_or_single" value="paired" /> - <param name="left_input" value="reads.left.simPE.fq" /> - <param name="right_input" value="reads.right.simPE.fq" /> - <param name="adv.rerundir" value="planemo_test_1" /> - <output name="trinity_log" > - <assert_contents> - <has_line_matching expression=".+" /> - <has_line line="Trinity exited with status 0" /> - </assert_contents> - </output> - <output name="assembled_transcripts" > - <assert_contents> - <has_line_matching expression=".+" /> - <has_line_matching expression=">TRINITY.+?len=.+?path=.+" /> - </assert_contents> - </output> - </test> - <test> - <param name="paired_or_single" value="paired" /> - <param name="left_input" value="Sp.cat_ds_hs.left.fq" /> - <param name="right_input" value="Sp.cat_ds_hs.right.fq" /> - <param name="adv.rerundir" value="planemo_test_2" /> - <!-- Following are not being used in this version of trinity.xml --> - <!-- - <param name="paired_or_single" value="paired" /> - <param name="left_input" file="cat_Sp.left.fq" /> - <param name="right_input" file="cat_Sp.right.fq" /> - <param name="JM" value="50G" /> - <param name="CPU" value="2" /> - <param name="library_type" value="None" /> - <param name="group_pairs_distance" value="500" /> - <param name="path_reinforcement_distance" va;ue="75" /> - <param name="use_additional" value="no" /> - --> - <output name="trinity_log" > - <assert_contents> - <has_line_matching expression=".+" /> - <has_line line="Trinity exited with status 0" /> - </assert_contents> - </output> - <output name="assembled_transcripts" > - <assert_contents> - <has_line_matching expression=".+" /> - <has_line_matching expression=">TRINITY.+?len=.+?path=.+" /> - </assert_contents> - </output> - </test> - </tests> - <help> -This instance is running Trinity version 2.4.0 and uses the following command: - - Trinity --max_memory 240G --CPU 8 --seqType seq_type --single singlefile or --left left_file --right right_file - -.. class:: infomark - -Trinity_, developed at the Broad Institute and the Hebrew University of Jerusalem, represents a novel method for the efficient and robust de novo reconstruction of transcriptomes from RNA-seq data. Trinity combines three independent software modules: Inchworm, Chrysalis, and Butterfly, applied sequentially to process large volumes of RNA-seq reads. Trinity partitions the sequence data into many individual de Bruijn graphs, each representing the transcriptional complexity at a given gene or locus, and then processes each graph independently to extract full-length splicing isoforms and to tease apart transcripts derived from paralogous genes. For more information, visit Trinity's wiki page here_. - -.. _Trinity: https://github.com/trinityrnaseq/trinityrnaseq/wiki -.. _here: https://github.com/trinityrnaseq/trinityrnaseq/wiki - </help> - - <citations> - <citation type="doi">10.1038/nbt.1883</citation> - </citations> - -</tool>
--- a/trinity_2_40/trinity_wrapper.py Tue Aug 29 12:00:16 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,162 +0,0 @@ -#!/usr/bin/env python - -''' -trinity_runner.py -This program is used as a wrapper for Trinity to allow an automatic rerun of failed jobs. It takes arguments for a typical Trinity run: -~ Required args ~ -Input files - single or paired (left and right) -File type (fasta, fastq) -Max memory - this I need to derive somehow from the dynamic runner using Galaxy slots - -~ Optional args ~ -Output directory - this allows users to run the same job over in case it walltime'd out or failed for recoverable reasons. - - -- -Created Tuesday, 7 March 2017. -Carrie Ganote - -Licensed to Indiana University under Creative Commons 3.0 -''' -import subprocess32 -import argparse -import logging as log -import sys -import os -import errno -from datetime import datetime - -TRINITY_OUT_DIR = "trinity_out_dir" - -def main(*args): - parser = argparse.ArgumentParser(description="") - parser.add_argument("-o","--output", help="Name of output directory") - parser.add_argument("-q","--seqType", help="Type of reads; fa or fq") - parser.add_argument("-m","--max_memory", help="How much memory to allocate? Or maybe how many cpus?") - parser.add_argument("-p","--mem_per_cpu", help="Memory PER CPU, in GB, in case we want to multiply mem x cpu at runtime") - parser.add_argument("-s","--single", help="Single read file input") - parser.add_argument("-l","--left", help="Left read file from paired inputs") - parser.add_argument("-r","--right", help="Right read file from paired inputs") - parser.add_argument("-v","--verbose", help="Enable debugging messages to be displayed", action='store_true') - parser.add_argument("-g","--log", help="Log file") - parser.add_argument("-t","--timing", help="Timing file, if it exists", default=None) - parser.add_argument("-d","--dir", help="if supplying a rerunnable job, this is the (hopefully unique) name of the directory to run it in.") - parser.add_argument("-u","--user", help="Username to run job under") - parser.add_argument("-f","--fullpath", help="if supplying a rerunnable job, this is the full path (except the user and dir names) to run the job in.") - parser.add_argument("-c","--CPU", help="CPUs, either a hard coded numer or from Galaxy slots") -# parser.add_argument("-","--", help="") - args = parser.parse_args() - - if args.verbose: - log.basicConfig(format='%(message)s',level=log.DEBUG) - cmd = ["Trinity"] - - ### Add rerun ability ########################################### - # This variable tells us later whether to copy the files back to the job working directory - copyback = False - if args.dir and args.user and args.fullpath: - cleandir = args.dir - chars = "\\`*_{}[]()>#+-.!$&;| " - for c in chars: - if c in cleandir: - cleandir = cleandir.replace(c, "_") - rerunPath = "%s/%s/%s" % (args.fullpath, args.user, cleandir) - print "Rerunpath is ",rerunPath - try: - os.makedirs(rerunPath) - print "Created dir ",rerunPath - except OSError as exc: - if exc.errno == errno.EEXIST and os.path.isdir(rerunPath): - pass - else: - raise - copyback = os.getcwd() - outdir = copyback + "/" + TRINITY_OUT_DIR - try: - os.makedirs(outdir) - print "Created dir ",outdir - except OSError as exc: - if exc.errno == errno.EEXIST and os.path.isdir(outdir): - pass - else: - raise - os.chdir(rerunPath) - - ### Add information for reads ################################### - if args.left and args.right: - cmd += ["--left",args.left,"--right", args.right] - elif args.single: - cmd += ["--single",args.single] - else: - raise Exception ("Need input files in order to run Trinity!") - - ### Add seqtype ################################################## - if args.seqType: - cmd += ["--seqType",args.seqType] - else: - raise Exception ("Please specify a file type for your reads!") - - ### Memory and CPU management #################################### - if args.mem_per_cpu and not args.max_memory: - if args.CPU: - memry = int(args.CPU) * int(args.mem_per_cpu) - memstr = "%dG" % (memry) - cmd += ["--max_memory",memstr] - else: - memry = 2 * int(args.mem_per_cpu) - memstr = "%dG" % (memry) - cmd += ["--max_memory",memstr] - elif args.max_memory and not args.mem_per_cpu: - cmd += ["--max_memory",args.max_memory] - else: - raise Exception ("Please pick Memory per cpu, or max mem, but not both.") - if args.CPU: - cmd += ["--CPU", args.CPU] - - ### Enough args, let's run it #################################### - print "About to write to %s" % args.log - out = open(args.log, 'w') - totalattempts = attempts = 2 - ec = 1 - finish = 1 - out.write("Command is:\n%s\n" % (" ".join(cmd))) - - ### There is definitely some value in running the job more than once, especially if it dies for stupid reasons.. ### - while ec != 0 and attempts > 0 and finish != 0: - - dt = datetime.now() - dtstr = dt.strftime("%d/%m/%y %H:%M") - out.write("Beginning attempt %d of Trinity job at %s\n" % (totalattempts - attempts +1, dtstr) ) - attempts -= 1 - ec = subprocess32.call(cmd, shell=False, stdin=None, stdout=out, stderr=out, timeout=None) - out.write("Trinity exited with status %d\n" % ec) - - greplog = open("greplog", 'w') - cmds = ["grep", 'All commands completed successfully', args.log] - finish = subprocess32.call(cmds,shell=False, stdin=None, stdout=greplog, stderr=greplog, timeout=None) - greplog.close() - out.write("Finished and found the success command with grep code %d\n" % finish) - - if ec == 0 and args.timing is not None: - if copyback is not False: - cwd = os.getcwd() - dest = copyback + "/" + TRINITY_OUT_DIR + "/Trinity.fasta" - src = cwd + "/" + TRINITY_OUT_DIR + "/Trinity.fasta" - print "copying trinity outputs from %s to %s" % (src, dest) - os.symlink(src, dest) - - #copy the timing file into the log - try: - handle = open (args.timing, 'r') - for line in handle: - out.write(line) - handle.close() - except (OSError, IOError) as e: - print "Oops, no timing file found? ",e - - - out.close() - exit (ec) - -if __name__ == "__main__": - main(*sys.argv) -
