annotate RNAseqDataAnnotation/RNAseqDataAnnotation.R @ 26:f183f8648c5a draft default tip

Uploaded
author eganrol
date Wed, 10 Dec 2014 06:42:21 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
26
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
1 #Author : keime / lornage
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
2 #Date : 2014/11
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
3
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
4
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
5 ########################################################################################################
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
6 #This function concatenates htseq-count result files, normalizes data and annotates data using Ensembl annotations
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
7
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
8 #arguments
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
9 #Species : Name of the species
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
10 #ensversion : version of Ensembl to use
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
11 #fileout : tab-delimited file containing for each library ; gene id, raw read counts, normalized data as well as normalized data/gene length
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
12 #corresp : data.frame linking the file loaded into galaxy to the corresponding condition
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
13 #nfiles : number of files(conditions)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
14
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
15 #output : a data.frame with the following columns :
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
16 #ensembl gene id
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
17 #raw read counts for each library (one column per library)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
18 #normalized data for each library (one column per library)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
19 #normalized data divided by gene length for each library (one column per library)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
20 #Gene name
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
21 #Description
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
22
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
23 #require : biomaRt and DESeq2 Bioconductor packages / package plyr1.8.1
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
24
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
25 #Methods :
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
26 #Normalization is performed using the method described in Genome Biology 2010;11(10):R106
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
27 #and implemented in the DESeq2 Bioconductor package
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
28 #Gene length correspond to the median of the size of all transcripts corresponding to this gene
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
29 #########################################################################################################
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
30
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
31
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
32
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
33 RNAseqDataAnnotation = function(Species, ensversion, fileout, corresp ,nfiles){
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
34
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
35 #Create a string vector called libnames that contains the name of the samples
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
36 libnames=rep("",nfiles)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
37 for (i in 1:nfiles){
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
38 libnames[i]=toString(corresp$Sample_name[i])
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
39 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
40
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
41 #For all files in corresp read the corresponding file into R
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
42 suppressPackageStartupMessages(library(plyr, lib.loc = NULL, character.only = FALSE, logical.return = FALSE, warn.conflicts = FALSE, verbose=FALSE, quietly = TRUE))
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
43 datalist = list()
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
44 for(i in 1:nfiles){
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
45 rawdata=read.table(toString(corresp$Files[i]), header =T, sep ="\t")
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
46 #noSpikes_htseq.
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
47 nbrrows=nrow(rawdata)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
48 datalist[[i]]=rawdata[1:(nbrrows-5), ] # skip the last 5 lines of HTSeq-count files
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
49 colnames(datalist[[i]]) = c("ID",libnames[i])
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
50 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
51
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
52 #Join all the files in a data.frame called datafile with rownames = gene id
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
53 datafile = join_all(datalist, by = "ID", type = "left", match = "all")
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
54
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
55 #Calculate the number of geneID pro file
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
56 nbID=data.frame(rep("",nfiles))
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
57 for(i in 1:nfiles){
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
58 nbID[,i]=nrow(datalist[[i]])
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
59 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
60 totalnbID=apply((nbID[,1:nfiles]),1,sum)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
61
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
62 #Verify that all the files contain the same gene ID
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
63 if (nrow(datafile)*nfiles==totalnbID[1]){
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
64
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
65 #Suppress genes not expressed in all samples
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
66 datafile = datafile[apply(datafile[,2:(nfiles+1)],1,sum)!=0,]
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
67 row.names(datafile)=datafile[,1]
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
68 data=datafile[,-1]
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
69
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
70 #Number of libraries
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
71 nblib= dim(data)[2]
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
72 #Determine Data + normalization if the specie is not known
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
73 if (Species=="None"){
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
74 #Normalized data calculation
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
75 nbcol = dim(data)[2] #nb of column in the data.frame
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
76 suppressPackageStartupMessages(library(DESeq2, lib.loc = NULL, character.only = FALSE, logical.return = FALSE, warn.conflicts = FALSE, verbose=FALSE, quietly = TRUE))
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
77 conds = factor(1:nblib)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
78 design = data.frame(Condition=conds)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
79 dds = DESeqDataSetFromMatrix(countData=data, colData=design, design=~Condition)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
80 dds = estimateSizeFactors(dds)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
81 datanorm = t(t(data)/sizeFactors(dds))
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
82
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
83 #Data + normalization
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
84 dataall = data.frame(row.names(datafile), data, datanorm )
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
85
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
86 #Renames columns
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
87 colnames(dataall) = c("Ensembl gene id", paste(libnames,"(raw read counts)"), paste(libnames,"(normalized)"))
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
88 write.table(dataall, file=fileout, sep="\t", quote=F, row.names=F)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
89 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
90
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
91 #Determine Data + normalization + annotation if the specie is known
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
92 else{
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
93 #Add annotations and calculate gene length
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
94 suppressPackageStartupMessages(library(biomaRt, lib.loc = NULL, character.only = FALSE, logical.return = FALSE, warn.conflicts = FALSE,verbose=FALSE, quietly = TRUE))
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
95
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
96 #Convert Ensembl version to host
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
97 correspondingdate = toString(ensversion)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
98 host = paste(correspondingdate, ".archive.ensembl.org/biomart/martservice/", sep="")
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
99
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
100 #Load the correct bmdataset
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
101 bmdataset = toString(Species)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
102 ensembl=useMart("ENSEMBL_MART_ENSEMBL", host=host, dataset=bmdataset)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
103 if (toString(ensversion)=="oct2014" | toString(ensversion)=="aug2014" ) {
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
104 annotation1 = getBM(attributes=c("ensembl_gene_id","external_gene_name","description", "ensembl_transcript_id","exon_chrom_start","exon_chrom_end"),filters="ensembl_gene_id", values=rownames(data), mart=ensembl)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
105 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
106 else{
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
107 annotation1 = getBM(attributes=c("ensembl_gene_id","external_gene_id","description", "ensembl_transcript_id","exon_chrom_start","exon_chrom_end"),filters="ensembl_gene_id", values=rownames(data), mart=ensembl)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
108 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
109
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
110 #because all the annotations are not always found in a first step
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
111 not = rownames(data)[!rownames(data) %in% unique(annotation1$ensembl_gene_id)]
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
112 if (length(not) !=0){
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
113 if (toString(ensversion)=="oct2014" | toString(ensversion)=="aug2014" ) {
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
114 annotationnot = getBM(attributes=c("ensembl_gene_id","external_gene_name","description", "ensembl_transcript_id","exon_chrom_start","exon_chrom_end"),filters="ensembl_gene_id", values=not, mart=ensembl)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
115 annotation2 = rbind(annotation1, annotationnot)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
116 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
117 else {
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
118 annotationnot = getBM(attributes=c("ensembl_gene_id","external_gene_id","description", "ensembl_transcript_id","exon_chrom_start","exon_chrom_end"), filters="ensembl_gene_id", values=not, mart=ensembl)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
119 annotation2 = rbind(annotation1, annotationnot)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
120 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
121 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
122 else{
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
123 annotation2 = annotation1
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
124 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
125
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
126
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
127 #because all the annotations are not always found in a first or second step
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
128 not = rownames(data)[!rownames(data) %in% unique(annotation2$ensembl_gene_id)]
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
129 if (length(not) !=0){
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
130 if (toString(ensversion)=="oct2014" | toString(ensversion)=="aug2014" ) {
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
131 annotationnot = getBM(attributes=c("ensembl_gene_id","external_gene_name","description", "ensembl_transcript_id","exon_chrom_start","exon_chrom_end"),filters="ensembl_gene_id", values=not, mart=ensembl)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
132 annotation = rbind(annotation2, annotationnot)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
133 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
134 else {
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
135 annotationnot = getBM(attributes=c("ensembl_gene_id","external_gene_id","description", "ensembl_transcript_id","exon_chrom_start","exon_chrom_end"), filters="ensembl_gene_id", values=not, mart=ensembl)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
136 annotation = rbind(annotation2, annotationnot)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
137 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
138 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
139 else{
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
140 annotation = annotation2
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
141 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
142
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
143 #Exon length
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
144 ensinfos.exlen = data.frame(annotation$ensembl_gene_id, annotation$ensembl_transcript_id, abs(annotation$exon_chrom_start - annotation$exon_chrom_end)+1)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
145 colnames(ensinfos.exlen) = c("ensembl_gene_id", "ensembl_transcript_id", "exon_length")
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
146
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
147 #Transcript length
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
148 tlen = tapply(ensinfos.exlen$exon_length, ensinfos.exlen$ensembl_transcript_id, sum)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
149 tlen.gene = merge(tlen, unique(ensinfos.exlen[,1:2]), by.x="row.names", by.y="ensembl_transcript_id")
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
150 colnames(tlen.gene) = c("ensembl_transcript_id", "transcript_length","ensembl_gene_id")
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
151
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
152 #Gene length = median of the size of all transcripts corresponding to this gene
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
153 glen = tapply(tlen.gene$transcript_length, tlen.gene$ensembl_gene_id, median)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
154
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
155 #Data with gene length
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
156 datalen = merge(data, glen, by="row.names")
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
157 colnames(datalen) = c("Ensembl_gene_id",colnames(data), "Gene_length")
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
158
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
159 #Data with annotations and gene length
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
160 annotationgene = unique(annotation[,1:3])
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
161 dataannot = merge(datalen, annotationgene, by.x="Ensembl_gene_id", by.y="ensembl_gene_id")
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
162
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
163 #To keep only the first part of the gene description (before [)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
164 tmpdesc = strsplit(as.character(dataannot$description),"[", fixed=T)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
165 f = function(l){
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
166 if (length(l)>=1){
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
167 return(l[[1]])
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
168 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
169 else{
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
170 return("")
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
171 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
172 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
173 tmpdescok = unlist(lapply(tmpdesc, f))
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
174 dataannot$description = tmpdescok
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
175
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
176 #Normalized data calculation
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
177 nbcol = dim(dataannot)[2] #nb of column in the data.frame
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
178 suppressPackageStartupMessages(library(DESeq2, lib.loc = NULL, character.only = FALSE, logical.return = FALSE, warn.conflicts = FALSE,verbose=FALSE, quietly = TRUE))
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
179 conds = factor(1:nblib)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
180 design = data.frame(Condition=conds)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
181 dds = DESeqDataSetFromMatrix(countData=dataannot[,-c(1,nbcol,nbcol-1,nbcol-2)], colData=design, design=~Condition)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
182 dds = estimateSizeFactors(dds)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
183 datanorm = t(t(dataannot[,-c(1,nbcol,nbcol-1,nbcol-2)])/sizeFactors(dds))
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
184
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
185 #Normalized data adjusted for gene length (normalized data / gene length)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
186 rpkn = datanorm / (as.vector(dataannot[,nbcol-2]/1000 ))
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
187
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
188 #Data + annotations + rpkn
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
189 dataall = data.frame(dataannot[,-c(nbcol,nbcol-1,nbcol-2)] , datanorm, rpkn, dataannot[,c(nbcol-1,nbcol)] )
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
190
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
191 #Renames columns
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
192 colnames(dataall) = c("Ensembl gene id", paste(libnames,"(raw read counts)"), paste(libnames,"(normalized)"), paste(libnames,"(normalized and divided by gene length in kb)"), "Gene name", "Description")
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
193 write.table(dataall, file=fileout, sep="\t", quote=F, row.names=F)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
194
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
195 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
196 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
197 else{
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
198 print("The files are not the same length")
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
199 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
200 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
201
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
202 # Build a dataframe containing the files loaded into galaxy and their corresponding condition
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
203
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
204 args <- commandArgs(trailingOnly = TRUE)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
205
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
206 Files=c()
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
207 Sample_name =c()
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
208 nbcells = (length(args)-3)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
209 for (i in seq(1,nbcells,2)){
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
210 Files = c(Files, args[3+i])
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
211 Sample_name = c(Sample_name, args[4+i])
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
212 }
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
213 nfiles=nbcells/2
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
214 corresp = data.frame(Files=Files, Sample_name=Sample_name)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
215
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
216 # Take the informations given by the galaxy user to run the script
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
217 RNAseqDataAnnotation(args[1], args[2],args[3],corresp,nfiles)
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
218
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
219
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
220
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
221
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
222
f183f8648c5a Uploaded
eganrol
parents:
diff changeset
223