Mercurial > repos > greg > multigps

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/multigps_wrapper.py	Wed Dec 23 10:34:14 2015 -0500
@@ -0,0 +1,83 @@
+import optparse
+import os
+import sys
+
+parser = optparse.OptionParser()
+parser.add_option('--threads', dest='threads', type="string", help='The number of threads to run')
+parser.add_option('--input_files', dest='input_file', action='append', nargs='1', type="string", help='Input datasets')
+
+parser.add_option('--genome_info_file', dest='genome_info_file', type="string", help='Genome information file')
+parser.add_option('--replicate_available', dest='replicate_available', type="string", help='Are the selected inputs biological replicates')
+parser.add_option('--smoothing_factor', dest='smoothing_factor', type="int", help='Gaussian smoothing standard deviation')
+parser.add_option('--output_report', dest='output_report', type="string", help='MultiGPS output report')
+parser.add_option('--gff', dest='gff', type="string", help='...')
+parser.add_option('--gz', dest='gz', type="string", help='...')
+parser.add_option('--design_file', dest='design_file', type="string", help='...')
+
+options, args = parser.parse_args()
+
+dh = open('/tmp/out.log', 'wb')
+dh.write( '\n options: \n%s\n' % str( options))
+dh.write( '\n args: \n%s\n' % str( args))
+dh.close()
+sys,stderr.write('Finished...')
+sys.exit(1)
+
+
+genome = options.genome
+input_format = options.input_format
+replicate = options.replicate
+smooth = options.smooth
+output_file = options.report
+output_file2 = options.gff
+output_file3 = options.gz
+# input_file = sys.argv[8]
+design_file = options.design_file
+input_file = options.input_files[0]
+if len(options.input_files) > 1:
+	design_arg = '--design %s' % design_file
+else:
+	design_arg = ''
+print file(design_file, 'r').read()
+
+tmp_name = output_file.split("/")
+tmp_name = tmp_name[len(tmp_name)-1]
+
+input_tmp_name = input_file.split("/")
+input_tmp_name = input_tmp_name[len(input_tmp_name)-1]
+tmp_name2 = input_tmp_name.split(".")
+
+f = open("/home/galaxy/galaxy-dist/tools/pughlab/command_tmp/"+tmp_name+".pbs","w")
+f.write("cd /home/galaxy/galaxy-dist/tools/pughlab/command_tmp/\n")
+f.write("mkdir "+tmp_name+"_analysis_result\n")
+f.write("cd "+tmp_name+"_analysis_result\n")
+f.write("ln -s "+input_file+" .\n")
+
+# cnt = 1
+# for ele in filenames:
+# 	if replicate == "Yes":
+# 		f.write("echo '"+ele+"\tSignal\t"+input_format+"\tDemoExp\t"+str(cnt)+"' >> target.design\n")
+# 	else:
+# 		f.write("echo '"+ele+"\tSignal\t"+input_format+"\tDemoExp"+str(cnt)+"\t"+str(cnt)+"' >> target.design\n")
+# 	cnt = cnt + 1
+f.write("java -Xmx6G -jar /home/galaxy/software/multigps/multigps_v0.5.jar --geninfo /home/galaxy/software/multigps/"+genome+".info --threads 4 "+design_arg+" --verbose --gaussmodelsmoothing --gausssmoothparam "+smooth+" --out multigps_out --memepath ~/bin --mememinw 6 --mememaxw 16 --seq /home/galaxy/software/multigps/refs/"+genome+" >multiGPS.out 2>&1\n")
+f.write("rm -rf "+output_file+"\n")
+f.write("mv multigps_out/multigps_out.all.events.table "+output_file+"\n")
+f.write("tar cvzf multigps_out.tar.gz multigps_out\n")
+f.write("mv multigps_out.tar.gz "+output_file3+"\n")
+
+f.close()
+
+os.system("sh /home/galaxy/galaxy-dist/tools/pughlab/command_tmp/"+tmp_name+".pbs 2>&1")
+
+f2 = open(output_file, "r")
+f3 = open(output_file2, "w")
+for line in f2:
+	line = line.strip()
+	data = line.split("\t")
+	if line[:1] != "#":
+		chrom = data[0].split(":")
+		f3.write(chrom[0]+"\tmultigps\t.\t"+chrom[1]+"\t"+chrom[1]+"\t"+data[1]+"\t.\t.\t.\n")
+f3.close()
+f2.close()
+exit(0)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample	Wed Dec 23 10:34:14 2015 -0500
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id> <dbkey> <display_name>  <file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon  hg19    Human (Homo sapiens): hg19 Canonical    /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon  hg18    Human (Homo sapiens): hg18 Canonical    /depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full   hg18    Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon  hg19    Human (Homo sapiens): hg19 Canonical    /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full   hg19    Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Wed Dec 23 10:34:14 2015 -0500
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Location of SAMTools indexes for FASTA files -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+    </table>
+</tables>