Mercurial > repos > trinity_ctat > testing_how_to_do_this

--- a/trinity_2_40/trinity.xml	Tue Aug 29 12:00:16 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,136 +0,0 @@
-<tool id="trinityrnaseq" name="Trinity" version="2.4.0">
-
-    <!-- Originally written by Jeremy Goecks,
-        later maintained by (in chronological order)
-            bhaas, Ben Fulton, Cicada Dennis
-    -->
-    <description>De novo assembly of RNA-Seq data using Trinity 2.4.0</description>
-    <requirements>
-        <requirement type="package" version="2.4.0">trinity</requirement>
-    </requirements>
-    <command>
-      <![CDATA[
-      python $__tool_directory__/trinity_wrapper.py --mem_per_cpu 31
-      --CPU \${GALAXY_SLOTS:-4}
-      #if str($inputs.paired_or_single) == "paired":
-       --left $inputs.left_input --right $inputs.right_input
-       #if $inputs.left_input.ext == 'fasta':
-        --seqType fa
-       #else:
-        --seqType fq
-       #end if
-      #else:
-       --single $inputs.input
-       #if $inputs.input.ext == 'fasta':
-        --seqType fa
-       #else:
-        --seqType fq
-       #end if
-      #end if
-      ## direct to output
-      --timing trinity_out_dir/Trinity.timing
-      --user $__user_id__
-      --fullpath /N/dc2/scratch/tstrnity/rerun
-      --dir '$adv.rerundir'
-      --log $trinity_log
-
- ]]>
-    </command>
-    <stdio>
-      <exit_code range="1:"   level="fatal"   description="Program failed" />
-      <exit_code range=":-1"   level="fatal"   description="DRM killed job" />
-    </stdio>
-    <inputs>
-      <conditional name="inputs">
-	<param name="paired_or_single" type="select" label="Paired or Single-end data?">
-          <option value="paired">Paired</option>
-          <option value="single">Single</option>
-        </param>
-        <when value="paired">
-          <param format="fasta,fastq" name="left_input" type="data" label="Left/Forward strand reads" help=""/>
-          <param format="fasta,fastq" name="right_input" type="data" label="Right/Reverse strand reads" help=""/>
-        </when>
-        <when value="single">
-          <param format="fasta,fastq" name="input" type="data" label="Single-end reads" help=""/>
-        </when>
-      </conditional>
-      <section name="adv" title="Allow Job Rerun" expanded="False">
-	<param name="rerundir" type="text" size="10" label="To make a job rerunnable, you will need to specify a unique tag to label the job, with no spaces or wierd characters." />
-    </section>
-    </inputs>
-    <outputs>
-      <data format="txt" name="trinity_log" label="${tool.name} on ${on_string}: log" />
-      <data format="fasta" name="assembled_transcripts" label="${tool.name} on ${on_string}: Assembled Transcripts" from_work_dir="trinity_out_dir/Trinity.fasta"/>
-    </outputs>
-    <tests>
-            <!-- Not testing with the following inputs anymore.
-            <param name="left_input" value="FLI1.left.fq" />
-            <param name="right_input" value="FLI1.right.fq" />
-            -->
-        <test>
-	    <param name="paired_or_single" value="paired" />
-            <param name="left_input" value="reads.left.simPE.fq" />
-            <param name="right_input" value="reads.right.simPE.fq" />
-            <param name="adv.rerundir" value="planemo_test_1" />
-	    <output name="trinity_log" >
-                <assert_contents>
-                    <has_line_matching expression=".+" />
-                    <has_line line="Trinity exited with status 0" />
-                </assert_contents>
-            </output>
-	    <output name="assembled_transcripts" >
-                <assert_contents>
-                    <has_line_matching expression=".+" />
-                    <has_line_matching expression=">TRINITY.+?len=.+?path=.+" />
-                </assert_contents>
-            </output>
-        </test>
-        <test>
-  	    <param name="paired_or_single" value="paired" />
-            <param name="left_input" value="Sp.cat_ds_hs.left.fq" />
-            <param name="right_input" value="Sp.cat_ds_hs.right.fq" />
-            <param name="adv.rerundir" value="planemo_test_2" />
-            <!-- Following are not being used in this version of trinity.xml -->
-            <!--
- 	    <param name="paired_or_single" value="paired" />
-            <param name="left_input" file="cat_Sp.left.fq" />
-            <param name="right_input" file="cat_Sp.right.fq" />
-            <param name="JM" value="50G" />
-            <param name="CPU" value="2" />
-            <param name="library_type" value="None" />
-            <param name="group_pairs_distance" value="500" />
-            <param name="path_reinforcement_distance" va;ue="75" />
-            <param name="use_additional" value="no" />
-            -->
-  	    <output name="trinity_log" >
-                <assert_contents>
-                    <has_line_matching expression=".+" />
-                    <has_line line="Trinity exited with status 0" />
-                </assert_contents>
-            </output>
-  	    <output name="assembled_transcripts" >
-                <assert_contents>
-                    <has_line_matching expression=".+" />
-                    <has_line_matching expression=">TRINITY.+?len=.+?path=.+" />
-                </assert_contents>
-            </output>
-        </test>
-    </tests>
-    <help>
-This instance is running Trinity version 2.4.0 and uses the following command:
-
-	 Trinity --max_memory 240G --CPU 8 --seqType seq_type --single singlefile or --left left_file --right right_file
-
-.. class:: infomark
-
-Trinity_, developed at the Broad Institute and the Hebrew University of Jerusalem, represents a novel method for the efficient and robust de novo reconstruction of transcriptomes from RNA-seq data. Trinity combines three independent software modules: Inchworm, Chrysalis, and Butterfly, applied sequentially to process large volumes of RNA-seq reads. Trinity partitions the sequence data into many individual de Bruijn graphs, each representing the transcriptional complexity at a given gene or locus, and then processes each graph independently to extract full-length splicing isoforms and to tease apart transcripts derived from paralogous genes. For more information, visit Trinity's wiki page here_.
-
-.. _Trinity: https://github.com/trinityrnaseq/trinityrnaseq/wiki
-.. _here: https://github.com/trinityrnaseq/trinityrnaseq/wiki
-    </help>
-
-        <citations>
-            <citation type="doi">10.1038/nbt.1883</citation>
-        </citations>
-
-</tool>
--- a/trinity_2_40/trinity_wrapper.py	Tue Aug 29 12:00:16 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,162 +0,0 @@
-#!/usr/bin/env python
-
-'''
-trinity_runner.py
-This program is used as a wrapper for Trinity to allow an automatic rerun of failed jobs. It takes arguments for a typical Trinity run:
-~ Required args ~
-Input files - single or paired (left and right)
-File type (fasta, fastq)
-Max memory - this I need to derive somehow from the dynamic runner using Galaxy slots
-
-~ Optional args ~
-Output directory - this allows users to run the same job over in case it walltime'd out or failed for recoverable reasons.
-
- --
-Created Tuesday, 7 March 2017.
-Carrie Ganote
-
-Licensed to Indiana University under Creative Commons 3.0
-'''
-import subprocess32
-import argparse
-import logging as log
-import sys
-import os
-import errno
-from datetime import datetime
-
-TRINITY_OUT_DIR = "trinity_out_dir"
-
-def main(*args):
-    parser = argparse.ArgumentParser(description="")
-    parser.add_argument("-o","--output", help="Name of output directory")
-    parser.add_argument("-q","--seqType", help="Type of reads; fa or fq")
-    parser.add_argument("-m","--max_memory", help="How much memory to allocate? Or maybe how many cpus?")
-    parser.add_argument("-p","--mem_per_cpu", help="Memory PER CPU, in GB, in case we want to multiply mem x cpu at runtime")
-    parser.add_argument("-s","--single", help="Single read file input")
-    parser.add_argument("-l","--left", help="Left read file from paired inputs")
-    parser.add_argument("-r","--right", help="Right read file from paired inputs")
-    parser.add_argument("-v","--verbose", help="Enable debugging messages to be displayed", action='store_true')
-    parser.add_argument("-g","--log", help="Log file")
-    parser.add_argument("-t","--timing", help="Timing file, if it exists", default=None)
-    parser.add_argument("-d","--dir", help="if supplying a rerunnable job, this is the (hopefully unique) name of the directory to run it in.")
-    parser.add_argument("-u","--user", help="Username to run job under")
-    parser.add_argument("-f","--fullpath", help="if supplying a rerunnable job, this is the full path (except the user and dir names) to run the job in.")
-    parser.add_argument("-c","--CPU", help="CPUs, either a hard coded numer or from Galaxy slots")
-#    parser.add_argument("-","--", help="")
-    args = parser.parse_args()
-
-    if args.verbose:
-        log.basicConfig(format='%(message)s',level=log.DEBUG)
-    cmd = ["Trinity"]
-
-    ### Add rerun ability ###########################################
-    # This variable tells us later whether to copy the files back to the job working directory
-    copyback = False
-    if args.dir and args.user and args.fullpath:
-        cleandir = args.dir
-        chars = "\\`*_{}[]()>#+-.!$&;| "
-        for c in chars:
-            if c in cleandir:
-                cleandir = cleandir.replace(c, "_")
-        rerunPath = "%s/%s/%s" % (args.fullpath, args.user, cleandir)
-        print "Rerunpath is ",rerunPath
-        try:
-            os.makedirs(rerunPath)
-            print "Created dir ",rerunPath
-        except OSError as exc:
-            if exc.errno == errno.EEXIST and os.path.isdir(rerunPath):
-                pass
-            else:
-                raise
-        copyback = os.getcwd()
-        outdir = copyback + "/" + TRINITY_OUT_DIR
-        try:
-            os.makedirs(outdir)
-            print "Created dir ",outdir
-        except OSError as exc:
-            if exc.errno == errno.EEXIST and os.path.isdir(outdir):
-                pass
-            else:
-                raise
-        os.chdir(rerunPath)
-
-    ### Add information for reads ###################################
-    if args.left and args.right:
-        cmd += ["--left",args.left,"--right", args.right]
-    elif args.single:
-        cmd += ["--single",args.single]
-    else:
-        raise Exception ("Need input files in order to run Trinity!")
-
-    ### Add seqtype ##################################################
-    if args.seqType:
-        cmd += ["--seqType",args.seqType]
-    else:
-        raise Exception ("Please specify a file type for your reads!")
-
-    ### Memory and CPU management ####################################
-    if args.mem_per_cpu and not args.max_memory:
-        if args.CPU:
-            memry = int(args.CPU) * int(args.mem_per_cpu)
-            memstr = "%dG" % (memry)
-            cmd += ["--max_memory",memstr]
-        else:
-            memry = 2 * int(args.mem_per_cpu)
-            memstr = "%dG" % (memry)
-            cmd += ["--max_memory",memstr]
-    elif args.max_memory and not args.mem_per_cpu:
-        cmd += ["--max_memory",args.max_memory]
-    else:
-        raise Exception ("Please pick Memory per cpu, or max mem, but not both.")
-    if args.CPU:
-        cmd += ["--CPU", args.CPU]
-
-    ### Enough args, let's run it ####################################
-    print "About to write to %s" % args.log
-    out = open(args.log, 'w')
-    totalattempts = attempts = 2
-    ec = 1
-    finish = 1
-    out.write("Command is:\n%s\n" % (" ".join(cmd)))
-
-    ### There is definitely some value in running the job more than once, especially if it dies for stupid reasons.. ###
-    while ec != 0 and attempts > 0 and finish != 0:
-
-        dt = datetime.now()
-        dtstr = dt.strftime("%d/%m/%y %H:%M")
-        out.write("Beginning attempt %d of Trinity job at %s\n" % (totalattempts - attempts +1, dtstr) )
-        attempts -= 1
-        ec = subprocess32.call(cmd, shell=False, stdin=None, stdout=out, stderr=out, timeout=None)
-        out.write("Trinity exited with status %d\n" % ec)
-
-        greplog = open("greplog", 'w')
-        cmds = ["grep", 'All commands completed successfully', args.log]
-        finish = subprocess32.call(cmds,shell=False,  stdin=None, stdout=greplog, stderr=greplog, timeout=None)
-        greplog.close()
-        out.write("Finished and found the success command with grep code %d\n" % finish)
-
-    if ec == 0 and args.timing is not None:
-        if copyback is not False:
-            cwd = os.getcwd()
-            dest = copyback + "/" + TRINITY_OUT_DIR + "/Trinity.fasta"
-            src = cwd + "/" + TRINITY_OUT_DIR + "/Trinity.fasta"
-            print "copying trinity outputs from %s to %s" % (src, dest)
-            os.symlink(src, dest)
-
-        #copy the timing file into the log
-        try:
-            handle = open (args.timing, 'r')
-            for line in handle:
-                out.write(line)
-            handle.close()
-        except (OSError, IOError) as e:
-            print "Oops, no timing file found? ",e
-
-
-    out.close()
-    exit (ec)
-
-if __name__ == "__main__":
-    main(*sys.argv)
-