annotate rgGSEA/makeGSEAloc.py @ 4:89e89b70a867 draft default tip

Uploaded
author fubar
date Sun, 09 Jun 2013 23:09:34 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
89e89b70a867 Uploaded
fubar
parents:
diff changeset
1 #!/bin/env python
89e89b70a867 Uploaded
fubar
parents:
diff changeset
2 # make the gsea loc files needed for the rgGSEA tool from a directory
89e89b70a867 Uploaded
fubar
parents:
diff changeset
3 # containing gmt and chip files downloaded from the gsea site
89e89b70a867 Uploaded
fubar
parents:
diff changeset
4 # Copyright ross lazarus feb 2012
89e89b70a867 Uploaded
fubar
parents:
diff changeset
5 # all rights reserved
89e89b70a867 Uploaded
fubar
parents:
diff changeset
6 # licensed under the LGPL
89e89b70a867 Uploaded
fubar
parents:
diff changeset
7
89e89b70a867 Uploaded
fubar
parents:
diff changeset
8 import os,sys,glob,time
89e89b70a867 Uploaded
fubar
parents:
diff changeset
9
89e89b70a867 Uploaded
fubar
parents:
diff changeset
10 notes = """
89e89b70a867 Uploaded
fubar
parents:
diff changeset
11
89e89b70a867 Uploaded
fubar
parents:
diff changeset
12 <table name="gseaChip" comment_char="#">
89e89b70a867 Uploaded
fubar
parents:
diff changeset
13 <columns>value, name, path</columns>
89e89b70a867 Uploaded
fubar
parents:
diff changeset
14 <file path="tool-data/gseaChip_%s.loc" />
89e89b70a867 Uploaded
fubar
parents:
diff changeset
15 </table>
89e89b70a867 Uploaded
fubar
parents:
diff changeset
16 <table name="gseaGMT" comment_char="#">
89e89b70a867 Uploaded
fubar
parents:
diff changeset
17 <columns>value, name, path</columns>
89e89b70a867 Uploaded
fubar
parents:
diff changeset
18 <file path="tool-data/gseaGMTsymbols%s.loc" />
89e89b70a867 Uploaded
fubar
parents:
diff changeset
19 </table>
89e89b70a867 Uploaded
fubar
parents:
diff changeset
20
89e89b70a867 Uploaded
fubar
parents:
diff changeset
21 """
89e89b70a867 Uploaded
fubar
parents:
diff changeset
22
89e89b70a867 Uploaded
fubar
parents:
diff changeset
23 def timenow():
89e89b70a867 Uploaded
fubar
parents:
diff changeset
24 """return current time as a string
89e89b70a867 Uploaded
fubar
parents:
diff changeset
25 """
89e89b70a867 Uploaded
fubar
parents:
diff changeset
26 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
89e89b70a867 Uploaded
fubar
parents:
diff changeset
27
89e89b70a867 Uploaded
fubar
parents:
diff changeset
28 def makeLoc(paths=[],head=[],outpath='gseaChip.loc',):
89e89b70a867 Uploaded
fubar
parents:
diff changeset
29 """
89e89b70a867 Uploaded
fubar
parents:
diff changeset
30 write a 3 column loc file - name, value, path from a glob list
89e89b70a867 Uploaded
fubar
parents:
diff changeset
31 """
89e89b70a867 Uploaded
fubar
parents:
diff changeset
32 paths.sort()
89e89b70a867 Uploaded
fubar
parents:
diff changeset
33 loc = [[os.path.basename(x),x] for x in paths]
89e89b70a867 Uploaded
fubar
parents:
diff changeset
34 loc = [[os.path.splitext(x[0])[0],os.path.splitext(x[0])[0],x[1]] for x in loc]
89e89b70a867 Uploaded
fubar
parents:
diff changeset
35 loc = ['\t'.join(x) for x in loc]
89e89b70a867 Uploaded
fubar
parents:
diff changeset
36 loc.sort()
89e89b70a867 Uploaded
fubar
parents:
diff changeset
37 f = open(outpath,'w')
89e89b70a867 Uploaded
fubar
parents:
diff changeset
38 f.write('\n'.join(head))
89e89b70a867 Uploaded
fubar
parents:
diff changeset
39 f.write('\n')
89e89b70a867 Uploaded
fubar
parents:
diff changeset
40 f.write('\n'.join(loc))
89e89b70a867 Uploaded
fubar
parents:
diff changeset
41 f.write('\n')
89e89b70a867 Uploaded
fubar
parents:
diff changeset
42 f.close()
89e89b70a867 Uploaded
fubar
parents:
diff changeset
43
89e89b70a867 Uploaded
fubar
parents:
diff changeset
44 """
89e89b70a867 Uploaded
fubar
parents:
diff changeset
45 AgoodchoiceC2_c3_c5_entrez_all.gmt Clontech_BD_Atlas.chip MOE430B.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
46 AgoodchoiceC2_c3_c5_orig_all.gmt CNMCMuscleChip.chip MoEx_1_0_st.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
47 AgoodchoiceC2_c3_c5_symbols_all.gmt CodeLink_Human_Whole_Genome.chip MoGene_1_0_st.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
48 APPLERA_ABI1700.chip CodeLink_UniSet_Human_20K_I_Bioarray.chip MoGene_1_1_st.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
49 ATH1_121501.chip CodeLink_UniSet_Human_I_Bioarray.chip Mouse430_2.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
50 AtlasMouse1.2.chip CodeLink_UniSet_Human_II_Bioarray.chip Mouse430A_2.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
51 AtlasRat1.2.chip CodeLink_UniSet_Rat_I_Bioarray.chip msigdb.v3.1.entrez.gmt
89e89b70a867 Uploaded
fubar
parents:
diff changeset
52 BIAO.chip DrosGenome1.chip msigdb_v3.1_files_to_download_locally
89e89b70a867 Uploaded
fubar
parents:
diff changeset
53 Bovine.chip Drosophila_2.chip msigdb.v3.1.orig.gmt
89e89b70a867 Uploaded
fubar
parents:
diff changeset
54 c1.all.v3.1.entrez.gmt G4110A.chip msigdb.v3.1.symbols.gmt
89e89b70a867 Uploaded
fubar
parents:
diff changeset
55 c1.all.v3.1.orig.gmt G4110Av2.chip msigdb_v3.1.xml
89e89b70a867 Uploaded
fubar
parents:
diff changeset
56 c1.all.v3.1.symbols.gmt GENE_SYMBOL.chip Mu11KsubA.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
57 c2.all.v3.1.entrez.gmt GenosysCytokineV2.chip Mu11KsubB.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
58 c2.all.v3.1.orig.gmt gseaChip3.1.loc Mu19KsubA.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
59 c2.all.v3.1.symbols.gmt gseaGMTsymbols_3.1.loc Mu19KsubB.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
60 c2.cgp.v3.1.entrez.gmt HC_G110.chip Mu19KsubC.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
61 c2.cgp.v3.1.orig.gmt HG_Focus.chip MWG_Human_30K_A.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
62 c2.cgp.v3.1.symbols.gmt HG_U133A_2.chip MWG_Human_30K_B.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
63 c2.cp.biocarta.v3.1.entrez.gmt HG_U133AAOFAV2.chip Netherland_cancer_institute_operon_human_35k.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
64 c2.cp.biocarta.v3.1.orig.gmt HG_U133A.chip Netherland_cancer_institute_operon_mouse_FOOk.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
65 c2.cp.biocarta.v3.1.symbols.gmt HG_U133B.chip NIA15k.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
66 c2.cp.kegg.v3.1.entrez.gmt HG_U133_Plus_2.chip OPERON_HUMANv2.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
67 c2.cp.kegg.v3.1.orig.gmt HG_U95Av2.chip OPERON_HUMANv3.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
68 c2.cp.kegg.v3.1.symbols.gmt HG_U95B.chip Ortholog_SEQ_ACCESSION_MOUSE.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
69 c2.cp.reactome.v3.1.entrez.gmt HG_U95C.chip RAE230A.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
70 c2.cp.reactome.v3.1.orig.gmt HG_U95D.chip RAE230B.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
71 c2.cp.reactome.v3.1.symbols.gmt HG_U95E.chip Rat230_2.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
72 c2.cp.v3.1.entrez.gmt HPCGGCompugenAnnotations.chip RefSeq_human.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
73 c2.cp.v3.1.orig.gmt HT_HG_U133A.chip RefSeq_NP_Human.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
74 c2.cp.v3.1.symbols.gmt HT_HG_U133A_EA.chip RefSeq_NP_Mouse.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
75 c3.all.v3.1.entrez.gmt Hu35KsubA.chip RefSeq_NP_Rat.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
76 c3.all.v3.1.orig.gmt Hu35KsubB.chip Research_Genetics.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
77 c3.all.v3.1.symbols.gmt Hu35KsubC.chip RG_U34A.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
78 c3.mir.v3.1.entrez.gmt Hu35KsubD.chip RG_U34B.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
79 c3.mir.v3.1.orig.gmt HU6800.chip RG_U34C.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
80 c3.mir.v3.1.symbols.gmt HuEx_1_0_STv2.chip RN_U34.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
81 c3.tft.v3.1.entrez.gmt HuGene_1_0_st.chip Rosetta50K.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
82 c3.tft.v3.1.orig.gmt Illimina_Mus6_v1_1.chip Rosetta.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
83 c3.tft.v3.1.symbols.gmt Illimuna_Mus6_v1.chip RT_U34.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
84 c4.all.v3.1.entrez.gmt Illumina_Hum6_v1.chip RZPD_Human_Ensembl1.1.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
85 c4.all.v3.1.orig.gmt Illumina_Hum6_v2.chip RZPD_Human_ORF_Clones_Gateway.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
86 c4.all.v3.1.symbols.gmt Illumina_Human.chip RZPD_Human_Unigene3.1.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
87 c4.cgn.v3.1.entrez.gmt Illumina_HumRef8_v1.chip Seq_Accession.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
88 c4.cgn.v3.1.orig.gmt Illumina_HumRef8_v2.chip Stanford.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
89 c4.cgn.v3.1.symbols.gmt Illumina_Mus6_v2.chip Stanford_Source_Accessions.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
90 c4.cm.v3.1.entrez.gmt Illumina_MusRef8_v1_1.chip TIGR_31K_Human_Set.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
91 c4.cm.v3.1.orig.gmt Illumina_MusRef8_v1.chip TIGR_40K_Human_Set.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
92 c4.cm.v3.1.symbols.gmt Illumina_RatRef12_v1.chip TRC.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
93 c5.all.v3.1.entrez.gmt ilmn_HumanHT_12_V3_0_R3_11283641_A.chip TRC_DB.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
94 c5.all.v3.1.orig.gmt ilmn_HumanHT_12_V4_0_R1_15002873_B.chip TRC_DB_v1.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
95 c5.all.v3.1.symbols.gmt ilmn_HumanRef_8_V2_0_R4_11223162_A.chip U133_X3P.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
96 c5.bp.v3.1.entrez.gmt ilmn_HUMANREF_8_V3_0_R1_11282963_A_WGDASL.chip UCLA_NIH_33K.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
97 c5.bp.v3.1.orig.gmt ilmn_HumanRef_8_V3_0_R3_11282963_A.chip Zebrafish.chip
89e89b70a867 Uploaded
fubar
parents:
diff changeset
98 c5.bp.v3.1.symbols.gmt
89e89b70a867 Uploaded
fubar
parents:
diff changeset
99 """
89e89b70a867 Uploaded
fubar
parents:
diff changeset
100 import subprocess
89e89b70a867 Uploaded
fubar
parents:
diff changeset
101
89e89b70a867 Uploaded
fubar
parents:
diff changeset
102 def makeRoss():
89e89b70a867 Uploaded
fubar
parents:
diff changeset
103 for kind in ['orig','symbols','entrez']:
89e89b70a867 Uploaded
fubar
parents:
diff changeset
104 outf = 'Abetterchoice_nocgp_c2_c3_c5_%s_all.gmt' % kind
89e89b70a867 Uploaded
fubar
parents:
diff changeset
105 o = open(outf,'w')
89e89b70a867 Uploaded
fubar
parents:
diff changeset
106 s = 'cat c2.cp.biocarta.v3.1.%s.gmt c2.cp.kegg.v3.1.%s.gmt c2.cp.reactome.v3.1.%s.gmt c3.all.v3.1.%s.gmt c5.all.v3.1.%s.gmt' \
89e89b70a867 Uploaded
fubar
parents:
diff changeset
107 % (kind,kind,kind,kind,kind)
89e89b70a867 Uploaded
fubar
parents:
diff changeset
108 cl = s.split(' ')
89e89b70a867 Uploaded
fubar
parents:
diff changeset
109 print 'running',cl
89e89b70a867 Uploaded
fubar
parents:
diff changeset
110 p = subprocess.check_call(cl,shell=False,stdout=o)
89e89b70a867 Uploaded
fubar
parents:
diff changeset
111 o.close()
89e89b70a867 Uploaded
fubar
parents:
diff changeset
112 outf = 'Agoodchoice_c2_c3_c5_%s_all.gmt' % kind
89e89b70a867 Uploaded
fubar
parents:
diff changeset
113 o = open(outf,'w')
89e89b70a867 Uploaded
fubar
parents:
diff changeset
114 s = 'cat c2.all.v3.1.%s.gmt c3.all.v3.1.%s.gmt c5.all.v3.1.%s.gmt' \
89e89b70a867 Uploaded
fubar
parents:
diff changeset
115 % (kind,kind,kind)
89e89b70a867 Uploaded
fubar
parents:
diff changeset
116 cl = s.split(' ')
89e89b70a867 Uploaded
fubar
parents:
diff changeset
117 print 'running',cl
89e89b70a867 Uploaded
fubar
parents:
diff changeset
118 p = subprocess.check_call(cl,shell=False,stdout=o)
89e89b70a867 Uploaded
fubar
parents:
diff changeset
119 o.close()
89e89b70a867 Uploaded
fubar
parents:
diff changeset
120
89e89b70a867 Uploaded
fubar
parents:
diff changeset
121
89e89b70a867 Uploaded
fubar
parents:
diff changeset
122 def main():
89e89b70a867 Uploaded
fubar
parents:
diff changeset
123 if len(sys.argv) >= 1:
89e89b70a867 Uploaded
fubar
parents:
diff changeset
124 usedir = os.path.abspath(sys.argv[1])
89e89b70a867 Uploaded
fubar
parents:
diff changeset
125 vers = sys.argv[2] or '3.1'
89e89b70a867 Uploaded
fubar
parents:
diff changeset
126 print 'Using supplied path',usedir,'and vers',vers
89e89b70a867 Uploaded
fubar
parents:
diff changeset
127 else: # default at BakerIDI
89e89b70a867 Uploaded
fubar
parents:
diff changeset
128 usedir = '/data/genomes/gsea'
89e89b70a867 Uploaded
fubar
parents:
diff changeset
129 vers = '3.1'
89e89b70a867 Uploaded
fubar
parents:
diff changeset
130 print 'Using default path',usedir,'and vers',vers
89e89b70a867 Uploaded
fubar
parents:
diff changeset
131 assert os.path.isdir(usedir),'## unable to open %s - please pass the path to a directory containing all gsea chip/gmt downloaded from ftp://gseaftp.broadinstitute.org/pub/gsea/annotations on the command line' % usedir
89e89b70a867 Uploaded
fubar
parents:
diff changeset
132 progName = os.path.basename(sys.argv[0])
89e89b70a867 Uploaded
fubar
parents:
diff changeset
133 h = ['# generated by %s at %s from path = %s' % (progName,timenow(),usedir),'# loc file for the rgGSEA tool pointing to files from ftp://gseaftp.broadinstitute.org/pub/gsea/annotations','# name value path']
89e89b70a867 Uploaded
fubar
parents:
diff changeset
134 makeRoss()
89e89b70a867 Uploaded
fubar
parents:
diff changeset
135 chips = glob.glob(os.path.join(usedir,'*.chip'))
89e89b70a867 Uploaded
fubar
parents:
diff changeset
136 makeLoc(paths=chips,head=h,outpath=os.path.join(usedir,'gseaChip%s.loc' % vers))
89e89b70a867 Uploaded
fubar
parents:
diff changeset
137 for kind in ['orig','symbols','entrez']:
89e89b70a867 Uploaded
fubar
parents:
diff changeset
138 gmt = glob.glob(os.path.join(usedir,'*.gmt'))
89e89b70a867 Uploaded
fubar
parents:
diff changeset
139 gmt = [x for x in gmt if x.find(kind) <> -1]
89e89b70a867 Uploaded
fubar
parents:
diff changeset
140 makeLoc(paths=gmt,head=h,outpath=os.path.join(usedir,'gseaGMT%s_%s.loc' % (kind,vers)))
89e89b70a867 Uploaded
fubar
parents:
diff changeset
141 print '## %s done. copy the new .loc files in %s to your galaxy tool-data directory, add them to your tool_data_tables.xml file and restart Galaxy' % (progName,usedir)
89e89b70a867 Uploaded
fubar
parents:
diff changeset
142 print '## something like:'
89e89b70a867 Uploaded
fubar
parents:
diff changeset
143 print notes % (vers,vers)
89e89b70a867 Uploaded
fubar
parents:
diff changeset
144 print '## should do for tool_data_tables.xml'
89e89b70a867 Uploaded
fubar
parents:
diff changeset
145 if __name__=="__main__":
89e89b70a867 Uploaded
fubar
parents:
diff changeset
146 main()