Mercurial > repos > fubar > rg_gsea
comparison rgGSEA/makeGSEAloc.py @ 4:89e89b70a867 draft default tip
Uploaded
| author | fubar |
|---|---|
| date | Sun, 09 Jun 2013 23:09:34 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 3:8e291f464aa0 | 4:89e89b70a867 |
|---|---|
| 1 #!/bin/env python | |
| 2 # make the gsea loc files needed for the rgGSEA tool from a directory | |
| 3 # containing gmt and chip files downloaded from the gsea site | |
| 4 # Copyright ross lazarus feb 2012 | |
| 5 # all rights reserved | |
| 6 # licensed under the LGPL | |
| 7 | |
| 8 import os,sys,glob,time | |
| 9 | |
| 10 notes = """ | |
| 11 | |
| 12 <table name="gseaChip" comment_char="#"> | |
| 13 <columns>value, name, path</columns> | |
| 14 <file path="tool-data/gseaChip_%s.loc" /> | |
| 15 </table> | |
| 16 <table name="gseaGMT" comment_char="#"> | |
| 17 <columns>value, name, path</columns> | |
| 18 <file path="tool-data/gseaGMTsymbols%s.loc" /> | |
| 19 </table> | |
| 20 | |
| 21 """ | |
| 22 | |
| 23 def timenow(): | |
| 24 """return current time as a string | |
| 25 """ | |
| 26 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time())) | |
| 27 | |
| 28 def makeLoc(paths=[],head=[],outpath='gseaChip.loc',): | |
| 29 """ | |
| 30 write a 3 column loc file - name, value, path from a glob list | |
| 31 """ | |
| 32 paths.sort() | |
| 33 loc = [[os.path.basename(x),x] for x in paths] | |
| 34 loc = [[os.path.splitext(x[0])[0],os.path.splitext(x[0])[0],x[1]] for x in loc] | |
| 35 loc = ['\t'.join(x) for x in loc] | |
| 36 loc.sort() | |
| 37 f = open(outpath,'w') | |
| 38 f.write('\n'.join(head)) | |
| 39 f.write('\n') | |
| 40 f.write('\n'.join(loc)) | |
| 41 f.write('\n') | |
| 42 f.close() | |
| 43 | |
| 44 """ | |
| 45 AgoodchoiceC2_c3_c5_entrez_all.gmt Clontech_BD_Atlas.chip MOE430B.chip | |
| 46 AgoodchoiceC2_c3_c5_orig_all.gmt CNMCMuscleChip.chip MoEx_1_0_st.chip | |
| 47 AgoodchoiceC2_c3_c5_symbols_all.gmt CodeLink_Human_Whole_Genome.chip MoGene_1_0_st.chip | |
| 48 APPLERA_ABI1700.chip CodeLink_UniSet_Human_20K_I_Bioarray.chip MoGene_1_1_st.chip | |
| 49 ATH1_121501.chip CodeLink_UniSet_Human_I_Bioarray.chip Mouse430_2.chip | |
| 50 AtlasMouse1.2.chip CodeLink_UniSet_Human_II_Bioarray.chip Mouse430A_2.chip | |
| 51 AtlasRat1.2.chip CodeLink_UniSet_Rat_I_Bioarray.chip msigdb.v3.1.entrez.gmt | |
| 52 BIAO.chip DrosGenome1.chip msigdb_v3.1_files_to_download_locally | |
| 53 Bovine.chip Drosophila_2.chip msigdb.v3.1.orig.gmt | |
| 54 c1.all.v3.1.entrez.gmt G4110A.chip msigdb.v3.1.symbols.gmt | |
| 55 c1.all.v3.1.orig.gmt G4110Av2.chip msigdb_v3.1.xml | |
| 56 c1.all.v3.1.symbols.gmt GENE_SYMBOL.chip Mu11KsubA.chip | |
| 57 c2.all.v3.1.entrez.gmt GenosysCytokineV2.chip Mu11KsubB.chip | |
| 58 c2.all.v3.1.orig.gmt gseaChip3.1.loc Mu19KsubA.chip | |
| 59 c2.all.v3.1.symbols.gmt gseaGMTsymbols_3.1.loc Mu19KsubB.chip | |
| 60 c2.cgp.v3.1.entrez.gmt HC_G110.chip Mu19KsubC.chip | |
| 61 c2.cgp.v3.1.orig.gmt HG_Focus.chip MWG_Human_30K_A.chip | |
| 62 c2.cgp.v3.1.symbols.gmt HG_U133A_2.chip MWG_Human_30K_B.chip | |
| 63 c2.cp.biocarta.v3.1.entrez.gmt HG_U133AAOFAV2.chip Netherland_cancer_institute_operon_human_35k.chip | |
| 64 c2.cp.biocarta.v3.1.orig.gmt HG_U133A.chip Netherland_cancer_institute_operon_mouse_FOOk.chip | |
| 65 c2.cp.biocarta.v3.1.symbols.gmt HG_U133B.chip NIA15k.chip | |
| 66 c2.cp.kegg.v3.1.entrez.gmt HG_U133_Plus_2.chip OPERON_HUMANv2.chip | |
| 67 c2.cp.kegg.v3.1.orig.gmt HG_U95Av2.chip OPERON_HUMANv3.chip | |
| 68 c2.cp.kegg.v3.1.symbols.gmt HG_U95B.chip Ortholog_SEQ_ACCESSION_MOUSE.chip | |
| 69 c2.cp.reactome.v3.1.entrez.gmt HG_U95C.chip RAE230A.chip | |
| 70 c2.cp.reactome.v3.1.orig.gmt HG_U95D.chip RAE230B.chip | |
| 71 c2.cp.reactome.v3.1.symbols.gmt HG_U95E.chip Rat230_2.chip | |
| 72 c2.cp.v3.1.entrez.gmt HPCGGCompugenAnnotations.chip RefSeq_human.chip | |
| 73 c2.cp.v3.1.orig.gmt HT_HG_U133A.chip RefSeq_NP_Human.chip | |
| 74 c2.cp.v3.1.symbols.gmt HT_HG_U133A_EA.chip RefSeq_NP_Mouse.chip | |
| 75 c3.all.v3.1.entrez.gmt Hu35KsubA.chip RefSeq_NP_Rat.chip | |
| 76 c3.all.v3.1.orig.gmt Hu35KsubB.chip Research_Genetics.chip | |
| 77 c3.all.v3.1.symbols.gmt Hu35KsubC.chip RG_U34A.chip | |
| 78 c3.mir.v3.1.entrez.gmt Hu35KsubD.chip RG_U34B.chip | |
| 79 c3.mir.v3.1.orig.gmt HU6800.chip RG_U34C.chip | |
| 80 c3.mir.v3.1.symbols.gmt HuEx_1_0_STv2.chip RN_U34.chip | |
| 81 c3.tft.v3.1.entrez.gmt HuGene_1_0_st.chip Rosetta50K.chip | |
| 82 c3.tft.v3.1.orig.gmt Illimina_Mus6_v1_1.chip Rosetta.chip | |
| 83 c3.tft.v3.1.symbols.gmt Illimuna_Mus6_v1.chip RT_U34.chip | |
| 84 c4.all.v3.1.entrez.gmt Illumina_Hum6_v1.chip RZPD_Human_Ensembl1.1.chip | |
| 85 c4.all.v3.1.orig.gmt Illumina_Hum6_v2.chip RZPD_Human_ORF_Clones_Gateway.chip | |
| 86 c4.all.v3.1.symbols.gmt Illumina_Human.chip RZPD_Human_Unigene3.1.chip | |
| 87 c4.cgn.v3.1.entrez.gmt Illumina_HumRef8_v1.chip Seq_Accession.chip | |
| 88 c4.cgn.v3.1.orig.gmt Illumina_HumRef8_v2.chip Stanford.chip | |
| 89 c4.cgn.v3.1.symbols.gmt Illumina_Mus6_v2.chip Stanford_Source_Accessions.chip | |
| 90 c4.cm.v3.1.entrez.gmt Illumina_MusRef8_v1_1.chip TIGR_31K_Human_Set.chip | |
| 91 c4.cm.v3.1.orig.gmt Illumina_MusRef8_v1.chip TIGR_40K_Human_Set.chip | |
| 92 c4.cm.v3.1.symbols.gmt Illumina_RatRef12_v1.chip TRC.chip | |
| 93 c5.all.v3.1.entrez.gmt ilmn_HumanHT_12_V3_0_R3_11283641_A.chip TRC_DB.chip | |
| 94 c5.all.v3.1.orig.gmt ilmn_HumanHT_12_V4_0_R1_15002873_B.chip TRC_DB_v1.chip | |
| 95 c5.all.v3.1.symbols.gmt ilmn_HumanRef_8_V2_0_R4_11223162_A.chip U133_X3P.chip | |
| 96 c5.bp.v3.1.entrez.gmt ilmn_HUMANREF_8_V3_0_R1_11282963_A_WGDASL.chip UCLA_NIH_33K.chip | |
| 97 c5.bp.v3.1.orig.gmt ilmn_HumanRef_8_V3_0_R3_11282963_A.chip Zebrafish.chip | |
| 98 c5.bp.v3.1.symbols.gmt | |
| 99 """ | |
| 100 import subprocess | |
| 101 | |
| 102 def makeRoss(): | |
| 103 for kind in ['orig','symbols','entrez']: | |
| 104 outf = 'Abetterchoice_nocgp_c2_c3_c5_%s_all.gmt' % kind | |
| 105 o = open(outf,'w') | |
| 106 s = 'cat c2.cp.biocarta.v3.1.%s.gmt c2.cp.kegg.v3.1.%s.gmt c2.cp.reactome.v3.1.%s.gmt c3.all.v3.1.%s.gmt c5.all.v3.1.%s.gmt' \ | |
| 107 % (kind,kind,kind,kind,kind) | |
| 108 cl = s.split(' ') | |
| 109 print 'running',cl | |
| 110 p = subprocess.check_call(cl,shell=False,stdout=o) | |
| 111 o.close() | |
| 112 outf = 'Agoodchoice_c2_c3_c5_%s_all.gmt' % kind | |
| 113 o = open(outf,'w') | |
| 114 s = 'cat c2.all.v3.1.%s.gmt c3.all.v3.1.%s.gmt c5.all.v3.1.%s.gmt' \ | |
| 115 % (kind,kind,kind) | |
| 116 cl = s.split(' ') | |
| 117 print 'running',cl | |
| 118 p = subprocess.check_call(cl,shell=False,stdout=o) | |
| 119 o.close() | |
| 120 | |
| 121 | |
| 122 def main(): | |
| 123 if len(sys.argv) >= 1: | |
| 124 usedir = os.path.abspath(sys.argv[1]) | |
| 125 vers = sys.argv[2] or '3.1' | |
| 126 print 'Using supplied path',usedir,'and vers',vers | |
| 127 else: # default at BakerIDI | |
| 128 usedir = '/data/genomes/gsea' | |
| 129 vers = '3.1' | |
| 130 print 'Using default path',usedir,'and vers',vers | |
| 131 assert os.path.isdir(usedir),'## unable to open %s - please pass the path to a directory containing all gsea chip/gmt downloaded from ftp://gseaftp.broadinstitute.org/pub/gsea/annotations on the command line' % usedir | |
| 132 progName = os.path.basename(sys.argv[0]) | |
| 133 h = ['# generated by %s at %s from path = %s' % (progName,timenow(),usedir),'# loc file for the rgGSEA tool pointing to files from ftp://gseaftp.broadinstitute.org/pub/gsea/annotations','# name value path'] | |
| 134 makeRoss() | |
| 135 chips = glob.glob(os.path.join(usedir,'*.chip')) | |
| 136 makeLoc(paths=chips,head=h,outpath=os.path.join(usedir,'gseaChip%s.loc' % vers)) | |
| 137 for kind in ['orig','symbols','entrez']: | |
| 138 gmt = glob.glob(os.path.join(usedir,'*.gmt')) | |
| 139 gmt = [x for x in gmt if x.find(kind) <> -1] | |
| 140 makeLoc(paths=gmt,head=h,outpath=os.path.join(usedir,'gseaGMT%s_%s.loc' % (kind,vers))) | |
| 141 print '## %s done. copy the new .loc files in %s to your galaxy tool-data directory, add them to your tool_data_tables.xml file and restart Galaxy' % (progName,usedir) | |
| 142 print '## something like:' | |
| 143 print notes % (vers,vers) | |
| 144 print '## should do for tool_data_tables.xml' | |
| 145 if __name__=="__main__": | |
| 146 main() |
