Mercurial > repos > iuc > isoformswitchanalyzer
comparison IsoformSwitchAnalyzeR.R @ 0:bb611fa3bc3b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/isoformswitchanalyzer commit 2c61e4c6151000201dd9a8323722a380bc235380
| author | iuc |
|---|---|
| date | Tue, 24 Jan 2023 18:36:55 +0000 |
| parents | |
| children | 5ae218cee629 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:bb611fa3bc3b |
|---|---|
| 1 # Load the IsoformSwitchAnalyzeR library | |
| 2 library(IsoformSwitchAnalyzeR, | |
| 3 quietly = TRUE, | |
| 4 warn.conflicts = FALSE) | |
| 5 library(argparse, quietly = TRUE, warn.conflicts = FALSE) | |
| 6 library(dplyr, quietly = TRUE, warn.conflicts = FALSE) | |
| 7 | |
| 8 # setup R error handling to go to stderr | |
| 9 options( | |
| 10 show.error.messages = FALSE, | |
| 11 error = function() { | |
| 12 cat(geterrmessage(), file = stderr()) | |
| 13 q("no", 1, FALSE) | |
| 14 } | |
| 15 ) | |
| 16 | |
| 17 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
| 18 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
| 19 | |
| 20 ################################################################################ | |
| 21 ### Input Processing | |
| 22 ################################################################################ | |
| 23 | |
| 24 | |
| 25 # Collect arguments from command line | |
| 26 parser <- ArgumentParser(description = "IsoformSwitcheR R script") | |
| 27 | |
| 28 parser$add_argument("--modeSelector") | |
| 29 parser$add_argument("--parentDir", required = FALSE, help = "Parent directory") | |
| 30 parser$add_argument("--readLength", | |
| 31 required = FALSE, | |
| 32 type = "integer", | |
| 33 help = "Read length (required for stringtie)") | |
| 34 parser$add_argument("--annotation", required = FALSE, help = "Annotation") | |
| 35 parser$add_argument("--transcriptome", required = FALSE, help = "Transcriptome") | |
| 36 parser$add_argument( | |
| 37 "--fixStringTieAnnotationProblem", | |
| 38 action = "store_true", | |
| 39 required = FALSE, | |
| 40 help = "Fix StringTie annotation problem" | |
| 41 ) | |
| 42 parser$add_argument("--countFiles", required = FALSE, help = "Count files") | |
| 43 parser$add_argument("--toolSource", required = FALSE, help = "Tool source") | |
| 44 parser$add_argument("--rObject", required = FALSE, help = "R object") | |
| 45 parser$add_argument("--IFcutoff", | |
| 46 required = FALSE, | |
| 47 type = "numeric", | |
| 48 help = "IFcutoff") | |
| 49 parser$add_argument( | |
| 50 "--geneExpressionCutoff", | |
| 51 required = FALSE, | |
| 52 type = "numeric", | |
| 53 help = "Gene expression cutoff" | |
| 54 ) | |
| 55 parser$add_argument( | |
| 56 "--isoformExpressionCutoff", | |
| 57 required = FALSE, | |
| 58 type = "numeric", | |
| 59 help = "Isoform expression cutoff" | |
| 60 ) | |
| 61 parser$add_argument("--alpha", | |
| 62 required = FALSE, | |
| 63 type = "numeric", | |
| 64 help = "") | |
| 65 parser$add_argument("--dIFcutoff", | |
| 66 required = FALSE, | |
| 67 type = "numeric", | |
| 68 help = "dIF cutoff") | |
| 69 parser$add_argument( | |
| 70 "--onlySigIsoforms", | |
| 71 required = FALSE, | |
| 72 action = "store_true", | |
| 73 help = "Only significative isoforms" | |
| 74 ) | |
| 75 parser$add_argument( | |
| 76 "--filterForConsequences", | |
| 77 required = FALSE, | |
| 78 action = "store_true", | |
| 79 help = "Filter for consequences" | |
| 80 ) | |
| 81 parser$add_argument( | |
| 82 "--removeSingleIsformGenes", | |
| 83 required = FALSE, | |
| 84 action = "store_true", | |
| 85 help = "Remove single isoform genes" | |
| 86 ) | |
| 87 parser$add_argument( | |
| 88 "--keepIsoformInAllConditions", | |
| 89 required = FALSE, | |
| 90 action = "store_true", | |
| 91 help = "Keep isoform in all conditions" | |
| 92 ) | |
| 93 parser$add_argument( | |
| 94 "--correctForConfoundingFactors", | |
| 95 required = FALSE, | |
| 96 action = "store_true", | |
| 97 help = "Correct for confunding factors" | |
| 98 ) | |
| 99 parser$add_argument( | |
| 100 "--overwriteIFvalues", | |
| 101 required = FALSE, | |
| 102 action = "store_true", | |
| 103 help = "Overwrite IF values" | |
| 104 ) | |
| 105 parser$add_argument( | |
| 106 "--reduceToSwitchingGenes", | |
| 107 required = FALSE, | |
| 108 action = "store_true", | |
| 109 help = "Reduce to switching genes" | |
| 110 ) | |
| 111 parser$add_argument( | |
| 112 "--reduceFurtherToGenesWithConsequencePotential", | |
| 113 required = FALSE, | |
| 114 action = "store_true", | |
| 115 help = "Reduce further to genes with consequence potential" | |
| 116 ) | |
| 117 parser$add_argument( | |
| 118 "--keepIsoformInAllConditions2", | |
| 119 required = FALSE, | |
| 120 action = "store_true", | |
| 121 help = "Keep isoform in ll conditions" | |
| 122 ) | |
| 123 parser$add_argument("--minORFlength", | |
| 124 required = FALSE, | |
| 125 type = "integer", | |
| 126 help = "") | |
| 127 parser$add_argument("--orfMethod", required = FALSE, help = "ORF methods") | |
| 128 parser$add_argument("--PTCDistance", | |
| 129 required = FALSE, | |
| 130 type = "integer", | |
| 131 help = "") | |
| 132 parser$add_argument( | |
| 133 "--removeShortAAseq", | |
| 134 required = FALSE, | |
| 135 action = "store_true", | |
| 136 help = "Remove short aminoacid sequences" | |
| 137 ) | |
| 138 parser$add_argument( | |
| 139 "--removeLongAAseq", | |
| 140 required = FALSE, | |
| 141 action = "store_true", | |
| 142 help = "Remove long aminoacid sequences" | |
| 143 ) | |
| 144 parser$add_argument( | |
| 145 "--removeORFwithStop", | |
| 146 required = FALSE, | |
| 147 action = "store_true", | |
| 148 help = "Remove ORF with stop codon" | |
| 149 ) | |
| 150 parser$add_argument( | |
| 151 "--onlySwitchingGenes", | |
| 152 required = FALSE, | |
| 153 action = "store_true", | |
| 154 help = "Only switching genes" | |
| 155 ) | |
| 156 parser$add_argument("--analysisMode", required = FALSE, help = "Analyze all isoforms with differential usage or single genes") | |
| 157 parser$add_argument( | |
| 158 "--genesToPlot", | |
| 159 type = "integer", | |
| 160 default = 10, | |
| 161 required = FALSE, | |
| 162 help = "Number of genes to plot" | |
| 163 ) | |
| 164 parser$add_argument("--gene", required = FALSE, help = "Gene ID to analyze") | |
| 165 parser$add_argument( | |
| 166 "--sortByQvals", | |
| 167 action = "store_true", | |
| 168 required = FALSE, | |
| 169 help = "Sort genes by Q-val values" | |
| 170 ) | |
| 171 parser$add_argument("--countGenes", | |
| 172 action = "store_true", | |
| 173 required = FALSE, | |
| 174 help = "Count genes") | |
| 175 parser$add_argument( | |
| 176 "--asFractionTotal", | |
| 177 action = "store_true", | |
| 178 required = FALSE, | |
| 179 help = "Plot gene expresson as fraction of total" | |
| 180 ) | |
| 181 parser$add_argument("--plotGenes", | |
| 182 action = "store_true", | |
| 183 required = FALSE, | |
| 184 help = "Plot genes instead of isoforms") | |
| 185 parser$add_argument( | |
| 186 "--simplifyLocation", | |
| 187 action = "store_true", | |
| 188 required = FALSE, | |
| 189 help = "Simplify localtion" | |
| 190 ) | |
| 191 parser$add_argument( | |
| 192 "--removeEmptyConsequences", | |
| 193 action = "store_true", | |
| 194 required = FALSE, | |
| 195 help = "Remove empty consequences" | |
| 196 ) | |
| 197 parser$add_argument( | |
| 198 "--analysisOppositeConsequence", | |
| 199 action = "store_true", | |
| 200 required = FALSE, | |
| 201 help = "Analysi opposite consequences" | |
| 202 ) | |
| 203 parser$add_argument("--pathToCPATresultFile", | |
| 204 required = FALSE, | |
| 205 help = "Path to CPAT result file") | |
| 206 parser$add_argument("--pathToCPC2resultFile", | |
| 207 required = FALSE, | |
| 208 help = "Path to CPC2 result file") | |
| 209 parser$add_argument("--pathToPFAMresultFile", | |
| 210 required = FALSE, | |
| 211 help = "Path to PFAM result file") | |
| 212 parser$add_argument("--pathToNetSurfP2resultFile", | |
| 213 required = FALSE, | |
| 214 help = "Path to NetSurfP2 result file") | |
| 215 parser$add_argument("--pathToSignalPresultFile", | |
| 216 required = FALSE, | |
| 217 help = "Path to signalP result file") | |
| 218 parser$add_argument("--pathToIUPred2AresultFile", | |
| 219 required = FALSE, | |
| 220 help = "Path to IUPred2A result file") | |
| 221 parser$add_argument("--codingCutoff", | |
| 222 required = FALSE, | |
| 223 type = "numeric", | |
| 224 help = "Codding cutoff") | |
| 225 parser$add_argument( | |
| 226 "--removeNoncodingORFs", | |
| 227 action = "store_true", | |
| 228 required = FALSE, | |
| 229 help = "Remove non-coding ORFs" | |
| 230 ) | |
| 231 parser$add_argument( | |
| 232 "--minSignalPeptideProbability", | |
| 233 required = FALSE, | |
| 234 type = "numeric", | |
| 235 help = "Minimul signal peptide probability" | |
| 236 ) | |
| 237 parser$add_argument( | |
| 238 "--smoothingWindowSize", | |
| 239 type = "integer", | |
| 240 required = FALSE, | |
| 241 help = "Smoothing windows size" | |
| 242 ) | |
| 243 parser$add_argument( | |
| 244 "--probabilityCutoff", | |
| 245 required = FALSE, | |
| 246 type = "double", | |
| 247 help = "Probability cutoff" | |
| 248 ) | |
| 249 parser$add_argument("--minIdrSize", | |
| 250 required = FALSE, | |
| 251 type = "integer", | |
| 252 help = "Min Idr size") | |
| 253 parser$add_argument( | |
| 254 "--annotateBindingSites", | |
| 255 action = "store_true", | |
| 256 required = FALSE, | |
| 257 help = "Annotate binding sites" | |
| 258 ) | |
| 259 parser$add_argument( | |
| 260 "--minIdrBindingSize", | |
| 261 required = FALSE, | |
| 262 type = "integer", | |
| 263 help = "Minimun Idr binding size" | |
| 264 ) | |
| 265 parser$add_argument( | |
| 266 "--minIdrBindingOverlapFrac", | |
| 267 required = FALSE, | |
| 268 type = "numeric", | |
| 269 help = "" | |
| 270 ) | |
| 271 parser$add_argument("--ntCutoff", | |
| 272 required = FALSE, | |
| 273 type = "integer", | |
| 274 help = "Nucleotide cutoff") | |
| 275 parser$add_argument("--ntFracCutoff", | |
| 276 required = FALSE, | |
| 277 type = "numeric", | |
| 278 help = "Nucleotide fraction cutoff") | |
| 279 parser$add_argument( | |
| 280 "--ntJCsimCutoff", | |
| 281 required = FALSE, | |
| 282 type = "numeric", | |
| 283 help = "Nucleotide Jaccard simmilarity cutoff" | |
| 284 ) | |
| 285 parser$add_argument("--AaCutoff", | |
| 286 required = FALSE, | |
| 287 type = "integer", | |
| 288 help = "Aminoacid cutoff") | |
| 289 parser$add_argument("--AaFracCutoff", | |
| 290 required = FALSE, | |
| 291 type = "numeric", | |
| 292 help = "Aminoacid fraction cutoff") | |
| 293 parser$add_argument( | |
| 294 "--AaJCsimCutoff", | |
| 295 required = FALSE, | |
| 296 type = "numeric", | |
| 297 help = "Aminoacid Jaccard similarity cutoff" | |
| 298 ) | |
| 299 parser$add_argument( | |
| 300 "--removeNonConseqSwitches", | |
| 301 action = "store_true", | |
| 302 required = FALSE, | |
| 303 help = "Remove switches without consequences" | |
| 304 ) | |
| 305 parser$add_argument( | |
| 306 "--rescaleTranscripts", | |
| 307 action = "store_true", | |
| 308 required = FALSE, | |
| 309 help = "Rescale transcripts" | |
| 310 ) | |
| 311 parser$add_argument( | |
| 312 "--reverseMinus", | |
| 313 action = "store_true", | |
| 314 required = FALSE, | |
| 315 help = "Reverse minus" | |
| 316 ) | |
| 317 parser$add_argument( | |
| 318 "--addErrorbars", | |
| 319 action = "store_true", | |
| 320 required = FALSE, | |
| 321 help = "Add error bars" | |
| 322 ) | |
| 323 | |
| 324 | |
| 325 args <- parser$parse_args() | |
| 326 | |
| 327 # Data import | |
| 328 ################### | |
| 329 | |
| 330 if (args$modeSelector == "data_import") { | |
| 331 | |
| 332 quantificationData <- importIsoformExpression( | |
| 333 parentDir = args$parentDir, | |
| 334 addIsofomIdAsColumn = TRUE, | |
| 335 readLength = args$readLength | |
| 336 ) | |
| 337 | |
| 338 ### Make design matrix | |
| 339 myDesign <- data.frame( | |
| 340 sampleID = colnames(quantificationData$abundance)[-1], | |
| 341 condition = gsub( | |
| 342 "[[:digit:]]+", | |
| 343 "", | |
| 344 colnames(quantificationData$abundance)[-1] | |
| 345 ) | |
| 346 ) | |
| 347 | |
| 348 if (args$toolSource == "stringtie") { | |
| 349 SwitchList <- importRdata( | |
| 350 isoformCountMatrix = quantificationData$counts, | |
| 351 isoformRepExpression = quantificationData$abundance, | |
| 352 designMatrix = myDesign, | |
| 353 isoformExonAnnoation = args$annotation, | |
| 354 isoformNtFasta = args$transcriptome, | |
| 355 showProgress = TRUE, | |
| 356 fixStringTieAnnotationProblem = args$fixStringTieAnnotationProblem | |
| 357 ) | |
| 358 } else { | |
| 359 SwitchList <- importRdata( | |
| 360 isoformCountMatrix = quantificationData$counts, | |
| 361 isoformRepExpression = quantificationData$abundance, | |
| 362 designMatrix = myDesign, | |
| 363 isoformExonAnnoation = args$annotation, | |
| 364 isoformNtFasta = args$transcriptome, | |
| 365 showProgress = TRUE | |
| 366 ) | |
| 367 } | |
| 368 | |
| 369 | |
| 370 geneCountMatrix <- extractGeneExpression( | |
| 371 SwitchList, | |
| 372 extractCounts = TRUE, | |
| 373 addGeneNames = FALSE, | |
| 374 addIdsAsColumns = FALSE | |
| 375 ) | |
| 376 | |
| 377 if (args$countFiles == "collection") { | |
| 378 | |
| 379 expressionDF <- data.frame(geneCountMatrix) | |
| 380 | |
| 381 myDesign$condition[length(myDesign$condition)] | |
| 382 | |
| 383 dataframe_factor1 <- expressionDF %>% select(matches(myDesign$condition[1])) | |
| 384 dataframe_factor2 <- expressionDF %>% select(matches(myDesign$condition[length(myDesign$condition)])) | |
| 385 | |
| 386 | |
| 387 lf1 <- as.list(as.data.frame(dataframe_factor1)) | |
| 388 sampleNames1 <- colnames(as.data.frame(dataframe_factor1)) | |
| 389 | |
| 390 lf2 <- as.list(as.data.frame(dataframe_factor2)) | |
| 391 sampleNames2 <- colnames(as.data.frame(dataframe_factor2)) | |
| 392 | |
| 393 geneNames <- row.names(as.data.frame(expressionDF)) | |
| 394 | |
| 395 | |
| 396 for (index in seq_along(lf1)) { | |
| 397 tabular_expression <- data.frame(geneNames, lf1[index]) | |
| 398 colnames(tabular_expression) <- | |
| 399 c("Geneid", sampleNames1[index]) | |
| 400 filename <- | |
| 401 paste(sampleNames1[index], "dataset.tabular", sep = "_") | |
| 402 output_path <- paste("./count_files/factor1/", filename, sep = "") | |
| 403 write.table( | |
| 404 tabular_expression, | |
| 405 output_path, | |
| 406 sep = "\t", | |
| 407 row.names = FALSE, | |
| 408 quote = FALSE | |
| 409 ) | |
| 410 } | |
| 411 for (index in seq_along(lf2)) { | |
| 412 tabular_expression <- data.frame(geneNames, lf2[index]) | |
| 413 colnames(tabular_expression) <- | |
| 414 c("Geneid", sampleNames2[index]) | |
| 415 filename <- | |
| 416 paste(sampleNames2[index], "dataset.tabular", sep = "_") | |
| 417 output_path <- paste("./count_files/factor2/", filename, sep = "") | |
| 418 write.table( | |
| 419 tabular_expression, | |
| 420 output_path, | |
| 421 sep = "\t", | |
| 422 row.names = FALSE, | |
| 423 quote = FALSE | |
| 424 ) | |
| 425 } | |
| 426 } else if (args$countFiles == "matrix") { | |
| 427 expressionDF <- data.frame(geneCountMatrix) | |
| 428 geneNames <- row.names(expressionDF) | |
| 429 | |
| 430 expressionDF <- cbind(geneNames, expressionDF) | |
| 431 write.table( | |
| 432 as.data.frame(expressionDF), | |
| 433 "./count_files/matrix.tabular", | |
| 434 sep = "\t", | |
| 435 row.names = FALSE, | |
| 436 quote = FALSE | |
| 437 ) | |
| 438 write.table( | |
| 439 as.data.frame(myDesign), | |
| 440 "./count_files/samples.tabular", | |
| 441 sep = "\t", | |
| 442 row.names = FALSE, | |
| 443 quote = FALSE | |
| 444 ) | |
| 445 } | |
| 446 | |
| 447 save(SwitchList, file = "SwitchList.Rda") | |
| 448 | |
| 449 } | |
| 450 | |
| 451 if (args$modeSelector == "first_step") { | |
| 452 | |
| 453 # First part of the analysis | |
| 454 ############################# | |
| 455 | |
| 456 load(file = args$rObject) | |
| 457 | |
| 458 ### Filter | |
| 459 SwitchList <- preFilter( | |
| 460 SwitchList, | |
| 461 IFcutoff = args$IFcutoff, | |
| 462 geneExpressionCutoff = args$geneExpressionCutoff, | |
| 463 isoformExpressionCutoff = args$isoformExpressionCutoff, | |
| 464 removeSingleIsoformGenes = args$removeSingleIsformGenes, | |
| 465 onlySigIsoforms = args$onlySigIsoforms, | |
| 466 keepIsoformInAllConditions = args$keepIsoformInAllConditions, | |
| 467 alpha = args$alpha, | |
| 468 dIFcutoff = args$dIFcutoff, | |
| 469 ) | |
| 470 | |
| 471 ### Test for isoform switches | |
| 472 SwitchList <- isoformSwitchTestDEXSeq( | |
| 473 SwitchList, | |
| 474 alpha = args$alpha, | |
| 475 dIFcutoff = args$dIFcutoff, | |
| 476 correctForConfoundingFactors = args$correctForConfoundingFactors, | |
| 477 overwriteIFvalues = args$overwriteIFvalues, | |
| 478 reduceToSwitchingGenes = args$reduceToSwitchingGenes, | |
| 479 reduceFurtherToGenesWithConsequencePotential = args$reduceFurtherToGenesWithConsequencePotential, | |
| 480 onlySigIsoforms = args$onlySigIsoforms, | |
| 481 keepIsoformInAllConditions = args$keepIsoformInAllConditions2, | |
| 482 showProgress = TRUE, | |
| 483 ) | |
| 484 | |
| 485 SwitchList <- analyzeNovelIsoformORF( | |
| 486 SwitchList, | |
| 487 analysisAllIsoformsWithoutORF = TRUE, | |
| 488 minORFlength = args$minORFlength, | |
| 489 orfMethod = args$orfMethod, | |
| 490 PTCDistance = args$PTCDistance, | |
| 491 startCodons = "ATG", | |
| 492 stopCodons = c("TAA", "TAG", "TGA"), | |
| 493 showProgress = TRUE, | |
| 494 ) | |
| 495 | |
| 496 ### Extract Sequences | |
| 497 SwitchList <- extractSequence( | |
| 498 SwitchList, | |
| 499 onlySwitchingGenes = args$onlySwitchingGenes, | |
| 500 alpha = args$alpha, | |
| 501 dIFcutoff = args$dIFcutoff, | |
| 502 extractNTseq = TRUE, | |
| 503 extractAAseq = TRUE, | |
| 504 removeShortAAseq = args$removeShortAAseq, | |
| 505 removeLongAAseq = args$removeLongAAseq, | |
| 506 removeORFwithStop = args$removeORFwithStop, | |
| 507 addToSwitchAnalyzeRlist = TRUE, | |
| 508 writeToFile = TRUE, | |
| 509 pathToOutput = getwd(), | |
| 510 outputPrefix = "isoformSwitchAnalyzeR_isoform", | |
| 511 forceReExtraction = FALSE, | |
| 512 quiet = FALSE | |
| 513 ) | |
| 514 | |
| 515 ### Summary | |
| 516 switchSummary <- extractSwitchSummary( | |
| 517 SwitchList, | |
| 518 filterForConsequences = args$filterForConsequences, | |
| 519 alpha = args$alpha, | |
| 520 dIFcutoff = args$dIFcutoff, | |
| 521 onlySigIsoforms = args$onlySigIsoforms, | |
| 522 ) | |
| 523 | |
| 524 save(SwitchList, file = "SwitchList.Rda") | |
| 525 write.table( | |
| 526 switchSummary, | |
| 527 file = "switchSummary.tsv", | |
| 528 quote = FALSE, | |
| 529 sep = "\t", | |
| 530 col.names = TRUE, | |
| 531 row.names = FALSE | |
| 532 ) | |
| 533 | |
| 534 } | |
| 535 | |
| 536 if (args$modeSelector == "second_step") { | |
| 537 | |
| 538 # Second part of the analysis | |
| 539 ############################# | |
| 540 | |
| 541 load(file = args$rObject) | |
| 542 | |
| 543 ### Add annotation | |
| 544 if (!is.null(args$pathToCPATresultFile)) { | |
| 545 SwitchList <- analyzeCPAT( | |
| 546 SwitchList, | |
| 547 pathToCPATresultFile = args$pathToCPATresultFile, | |
| 548 codingCutoff = args$codingCutoff, | |
| 549 removeNoncodinORFs = args$removeNoncodingORFs | |
| 550 ) | |
| 551 } | |
| 552 | |
| 553 if (!is.null(args$pathToCPC2resultFile)) { | |
| 554 SwitchList <- analyzeCPC2( | |
| 555 SwitchList, | |
| 556 pathToCPC2resultFile = args$pathToCPC2resultFile, | |
| 557 removeNoncodinORFs = args$removeNoncodingORFs | |
| 558 ) | |
| 559 } | |
| 560 | |
| 561 if (!is.null(args$pathToPFAMresultFile)) { | |
| 562 pfamFiles <- list.files(path = args$pathToPFAMresultFile, | |
| 563 full.names = TRUE) | |
| 564 | |
| 565 SwitchList <- analyzePFAM(SwitchList, | |
| 566 pathToPFAMresultFile = pfamFiles, | |
| 567 showProgress = FALSE) | |
| 568 } | |
| 569 | |
| 570 if (!is.null(args$pathToNetSurfP2resultFile)) { | |
| 571 netsurfFiles <- list.files(path = args$pathToNetSurfP2resultFile, | |
| 572 full.names = TRUE) | |
| 573 | |
| 574 SwitchList <- analyzeNetSurfP2( | |
| 575 SwitchList, | |
| 576 pathToNetSurfP2resultFile = netsurfFiles, | |
| 577 smoothingWindowSize = args$smoothingWindowSize, | |
| 578 probabilityCutoff = args$probabilityCutoff, | |
| 579 minIdrSize = args$minIdrSize, | |
| 580 showProgress = TRUE | |
| 581 ) | |
| 582 } | |
| 583 | |
| 584 if (!is.null(args$pathToIUPred2AresultFile)) { | |
| 585 SwitchList <- analyzeIUPred2A( | |
| 586 SwitchList, | |
| 587 pathToIUPred2AresultFile = args$pathToIUPred2AresultFile, | |
| 588 smoothingWindowSize = args$smoothingWindowSize, | |
| 589 probabilityCutoff = args$probabilityCutoff, | |
| 590 minIdrSize = args$minIdrSize, | |
| 591 annotateBindingSites = args$annotateBindingSites, | |
| 592 minIdrBindingSize = args$minIdrBindingSize, | |
| 593 minIdrBindingOverlapFrac = args$minIdrBindingOverlapFrac, | |
| 594 showProgress = TRUE, | |
| 595 quiet = FALSE | |
| 596 ) | |
| 597 } | |
| 598 | |
| 599 if (!is.null(args$pathToSignalPresultFile)) { | |
| 600 signalpFiles <- list.files(path = args$pathToSignalPresultFile, | |
| 601 full.names = TRUE) | |
| 602 | |
| 603 SwitchList <- analyzeSignalP( | |
| 604 SwitchList, | |
| 605 pathToSignalPresultFile = signalpFiles, | |
| 606 minSignalPeptideProbability = args$minSignalPeptideProbability | |
| 607 ) | |
| 608 } | |
| 609 | |
| 610 SwitchList <- analyzeAlternativeSplicing( | |
| 611 SwitchList, | |
| 612 onlySwitchingGenes = args$onlySwitchingGenes, | |
| 613 alpha = args$alpha, | |
| 614 dIFcutoff = args$dIFcutoff, | |
| 615 showProgress = TRUE | |
| 616 ) | |
| 617 | |
| 618 SwitchList <- analyzeIntronRetention( | |
| 619 SwitchList, | |
| 620 onlySwitchingGenes = args$onlySwitchingGenes, | |
| 621 alpha = args$alpha, | |
| 622 dIFcutoff = args$dIFcutoff, | |
| 623 showProgress = TRUE | |
| 624 ) | |
| 625 | |
| 626 consequences <- c( | |
| 627 "intron_retention", | |
| 628 "NMD_status", | |
| 629 "isoform_seq_similarity", | |
| 630 "ORF_genomic", | |
| 631 "tss", | |
| 632 "tts" | |
| 633 ) | |
| 634 | |
| 635 if (!is.null(args$pathToCPATresultFile) || | |
| 636 !is.null(args$pathToCPC2resultFile)) { | |
| 637 updatedConsequences <- c(consequences, "coding_potential") | |
| 638 consequences <- updatedConsequences | |
| 639 } | |
| 640 | |
| 641 if (!is.null(args$pathToPFAMresultFile)) { | |
| 642 updatedConsequences <- c(consequences, "domains_identified") | |
| 643 consequences <- updatedConsequences | |
| 644 } | |
| 645 | |
| 646 if (!is.null(args$pathToSignalPresultFile)) { | |
| 647 updatedConsequences <- c(consequences, "signal_peptide_identified") | |
| 648 consequences <- updatedConsequences | |
| 649 } | |
| 650 | |
| 651 if (!is.null(args$pathToNetSurfP2resultFile) || | |
| 652 !is.null(args$pathToIUPred2AresultFile)) { | |
| 653 updatedConsequences <- c(consequences, "IDR_identified", "IDR_type") | |
| 654 consequences <- updatedConsequences | |
| 655 } | |
| 656 | |
| 657 SwitchList <- analyzeSwitchConsequences( | |
| 658 SwitchList, | |
| 659 consequencesToAnalyze = consequences, | |
| 660 alpha = args$alpha, | |
| 661 dIFcutoff = args$dIFcutoff, | |
| 662 onlySigIsoforms = args$onlySigIsoforms, | |
| 663 ntCutoff = args$ntCutoff, | |
| 664 ntJCsimCutoff = args$ntJCsimCutoff, | |
| 665 AaCutoff = args$AaCutoff, | |
| 666 AaFracCutoff = args$AaFracCutoff, | |
| 667 AaJCsimCutoff = args$AaJCsimCutoff, | |
| 668 removeNonConseqSwitches = args$removeNonConseqSwitches, | |
| 669 showProgress = TRUE | |
| 670 ) | |
| 671 | |
| 672 | |
| 673 ### Visual analysis | |
| 674 # Top genes | |
| 675 | |
| 676 if (args$analysisMode == "single") { | |
| 677 | |
| 678 outputFile <- file.path(getwd(), "single_gene.pdf") | |
| 679 | |
| 680 pdf( | |
| 681 file = outputFile, | |
| 682 onefile = FALSE, | |
| 683 height = 6, | |
| 684 width = 9 | |
| 685 ) | |
| 686 | |
| 687 switchPlot( | |
| 688 SwitchList, | |
| 689 gene = args$gene, | |
| 690 condition1 = myDesign$condition[1], | |
| 691 condition2 = myDesign$condition[length(myDesign$condition)], | |
| 692 IFcutoff = args$IFcutoff, | |
| 693 dIFcutoff = args$dIFcutoff, | |
| 694 rescaleTranscripts = args$rescaleTranscripts, | |
| 695 reverseMinus = args$reverseMinus, | |
| 696 addErrorbars = args$addErrorbars, | |
| 697 logYaxis = FALSE, | |
| 698 localTheme = theme_bw(base_size = 8) | |
| 699 ) | |
| 700 dev.off() | |
| 701 | |
| 702 } else { | |
| 703 mostSwitchingGene <- | |
| 704 extractTopSwitches( | |
| 705 SwitchList, | |
| 706 n = Inf, | |
| 707 filterForConsequences = args$filterForConsequences, | |
| 708 extractGenes = TRUE, | |
| 709 alpha = args$alpha, | |
| 710 dIFcutoff = args$dIFcutoff, | |
| 711 inEachComparison = FALSE, | |
| 712 sortByQvals = args$sortByQvals | |
| 713 ) | |
| 714 | |
| 715 write.table( | |
| 716 mostSwitchingGene, | |
| 717 file = "mostSwitchingGene.tsv", | |
| 718 quote = FALSE, | |
| 719 sep = "\t", | |
| 720 col.names = TRUE, | |
| 721 row.names = FALSE | |
| 722 ) | |
| 723 | |
| 724 | |
| 725 switchPlotTopSwitches( | |
| 726 SwitchList, | |
| 727 alpha = args$alpha, | |
| 728 dIFcutoff = args$dIFcutoff, | |
| 729 onlySigIsoforms = args$onlySigIsoforms, | |
| 730 n = args$genesToPlot, | |
| 731 sortByQvals = args$sortByQvals, | |
| 732 pathToOutput = getwd(), | |
| 733 fileType = "pdf" | |
| 734 ) | |
| 735 | |
| 736 outputFile <- | |
| 737 file.path(getwd(), "extractConsequencesSummary.pdf") | |
| 738 | |
| 739 pdf( | |
| 740 file = outputFile, | |
| 741 onefile = FALSE, | |
| 742 height = 6, | |
| 743 width = 9 | |
| 744 ) | |
| 745 | |
| 746 consequenceSummary <- extractConsequenceSummary( | |
| 747 SwitchList, | |
| 748 consequencesToAnalyze = "all", | |
| 749 includeCombined = FALSE, | |
| 750 asFractionTotal = args$asFractionTotal, | |
| 751 alpha = args$alpha, | |
| 752 dIFcutoff = args$dIFcutoff, | |
| 753 plot = TRUE, | |
| 754 plotGenes = args$plotGenes, | |
| 755 simplifyLocation = args$simplifyLocation, | |
| 756 removeEmptyConsequences = args$removeEmptyConsequences, | |
| 757 returnResult = TRUE, | |
| 758 localTheme = theme_bw() | |
| 759 ) | |
| 760 dev.off() | |
| 761 | |
| 762 write.table( | |
| 763 consequenceSummary, | |
| 764 file = "consequencesSummary.tsv", | |
| 765 quote = FALSE, | |
| 766 sep = "\t", | |
| 767 col.names = TRUE, | |
| 768 row.names = FALSE | |
| 769 ) | |
| 770 | |
| 771 | |
| 772 outputFile <- file.path(getwd(), "consequencesEnrichment.pdf") | |
| 773 pdf( | |
| 774 file = outputFile, | |
| 775 onefile = FALSE, | |
| 776 height = 6, | |
| 777 width = 9 | |
| 778 ) | |
| 779 consequenceEnrichment <- extractConsequenceEnrichment( | |
| 780 SwitchList, | |
| 781 consequencesToAnalyze = "all", | |
| 782 alpha = args$alpha, | |
| 783 dIFcutoff = args$dIFcutoff, | |
| 784 countGenes = args$countGenes, | |
| 785 analysisOppositeConsequence = args$analysisOppositeConsequence, | |
| 786 plot = TRUE, | |
| 787 localTheme = theme_bw(base_size = 12), | |
| 788 minEventsForPlotting = 10, | |
| 789 returnResult = TRUE | |
| 790 ) | |
| 791 dev.off() | |
| 792 | |
| 793 write.table( | |
| 794 consequenceEnrichment, | |
| 795 file = "consequencesEnrichment.tsv", | |
| 796 quote = FALSE, | |
| 797 sep = "\t", | |
| 798 col.names = TRUE, | |
| 799 row.names = FALSE | |
| 800 ) | |
| 801 | |
| 802 | |
| 803 outputFile <- file.path(getwd(), "splicingEnrichment.pdf") | |
| 804 pdf( | |
| 805 file = outputFile, | |
| 806 onefile = FALSE, | |
| 807 height = 6, | |
| 808 width = 9 | |
| 809 ) | |
| 810 splicingEnrichment <- extractSplicingEnrichment( | |
| 811 SwitchList, | |
| 812 splicingToAnalyze = "all", | |
| 813 alpha = args$alpha, | |
| 814 dIFcutoff = args$dIFcutoff, | |
| 815 onlySigIsoforms = args$onlySigIsoforms, | |
| 816 countGenes = args$countGenes, | |
| 817 plot = TRUE, | |
| 818 minEventsForPlotting = 10, | |
| 819 returnResult = TRUE | |
| 820 ) | |
| 821 dev.off() | |
| 822 | |
| 823 write.table( | |
| 824 splicingEnrichment, | |
| 825 file = "splicingEnrichment.tsv", | |
| 826 quote = FALSE, | |
| 827 sep = "\t", | |
| 828 col.names = TRUE, | |
| 829 row.names = FALSE | |
| 830 ) | |
| 831 | |
| 832 | |
| 833 outputFile <- file.path(getwd(), "splicingSummary.pdf") | |
| 834 pdf( | |
| 835 file = outputFile, | |
| 836 onefile = FALSE, | |
| 837 height = 6, | |
| 838 width = 9 | |
| 839 ) | |
| 840 splicingSummary <- extractSplicingSummary( | |
| 841 SwitchList, | |
| 842 splicingToAnalyze = "all", | |
| 843 asFractionTotal = args$asFractionTotal, | |
| 844 alpha = args$alpha, | |
| 845 dIFcutoff = args$dIFcutoff, | |
| 846 onlySigIsoforms = args$onlySigIsoforms, | |
| 847 plot = TRUE, | |
| 848 plotGenes = args$plotGenes, | |
| 849 localTheme = theme_bw(), | |
| 850 returnResult = TRUE | |
| 851 ) | |
| 852 dev.off() | |
| 853 | |
| 854 write.table( | |
| 855 splicingSummary, | |
| 856 file = "splicingSummary.tsv", | |
| 857 quote = FALSE, | |
| 858 sep = "\t", | |
| 859 col.names = TRUE, | |
| 860 row.names = FALSE | |
| 861 ) | |
| 862 | |
| 863 | |
| 864 ### Volcano like plot: | |
| 865 outputFile <- file.path(getwd(), "volcanoPlot.pdf") | |
| 866 pdf( | |
| 867 file = outputFile, | |
| 868 onefile = FALSE, | |
| 869 height = 6, | |
| 870 width = 9 | |
| 871 ) | |
| 872 ggplot(data = SwitchList$isoformFeatures, aes(x = dIF, y = -log10(isoform_switch_q_value))) + | |
| 873 geom_point(aes(color = abs(dIF) > 0.1 & | |
| 874 isoform_switch_q_value < 0.05), # default cutoff | |
| 875 size = 1) + | |
| 876 geom_hline(yintercept = -log10(0.05), linetype = "dashed") + # default cutoff | |
| 877 geom_vline(xintercept = c(-0.1, 0.1), linetype = "dashed") + # default cutoff | |
| 878 facet_wrap(~ condition_2) + | |
| 879 scale_color_manual("Signficant\nIsoform Switch", values = c("black", "red")) + | |
| 880 labs(x = "dIF", y = "-Log10 ( Isoform Switch Q Value )") + | |
| 881 theme_bw() | |
| 882 dev.off() | |
| 883 | |
| 884 | |
| 885 ### Switch vs Gene changes: | |
| 886 outputFile <- file.path(getwd(), "switchGene.pdf") | |
| 887 pdf( | |
| 888 file = outputFile, | |
| 889 onefile = FALSE, | |
| 890 height = 6, | |
| 891 width = 9 | |
| 892 ) | |
| 893 ggplot(data = SwitchList$isoformFeatures, | |
| 894 aes(x = gene_log2_fold_change, y = dIF)) + | |
| 895 geom_point(aes(color = abs(dIF) > 0.1 & | |
| 896 isoform_switch_q_value < 0.05), | |
| 897 size = 1) + | |
| 898 facet_wrap(~ condition_2) + | |
| 899 geom_hline(yintercept = 0, linetype = "dashed") + | |
| 900 geom_vline(xintercept = 0, linetype = "dashed") + | |
| 901 scale_color_manual("Signficant\nIsoform Switch", values = c("black", "red")) + | |
| 902 labs(x = "Gene log2 fold change", y = "dIF") + | |
| 903 theme_bw() | |
| 904 dev.off() | |
| 905 | |
| 906 outputFile <- file.path(getwd(), "splicingGenomewide.pdf") | |
| 907 pdf( | |
| 908 file = outputFile, | |
| 909 onefile = FALSE, | |
| 910 height = 6, | |
| 911 width = 9 | |
| 912 ) | |
| 913 splicingGenomeWide <- extractSplicingGenomeWide( | |
| 914 SwitchList, | |
| 915 featureToExtract = "all", | |
| 916 splicingToAnalyze = c("A3", "MES", "ATSS"), | |
| 917 plot = TRUE, | |
| 918 returnResult = TRUE | |
| 919 ) | |
| 920 dev.off() | |
| 921 } | |
| 922 save(SwitchList, file = "SwitchList.Rda") | |
| 923 | |
| 924 } |
