Mercurial > repos > iuc > scpipe
comparison scpipe.R @ 3:25e52048c7cc draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scpipe commit 8007f71281553ddfa45e6f8e1172952d956bb000"
| author | iuc |
|---|---|
| date | Thu, 11 Jun 2020 11:17:54 +0000 |
| parents | 9f8ce2980849 |
| children |
comparison
equal
deleted
inserted
replaced
| 2:9f8ce2980849 | 3:25e52048c7cc |
|---|---|
| 1 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | 1 err_foo <- function() { |
| 2 cat(geterrmessage(), file = stderr()); | |
| 3 q("no", 1, F) | |
| 4 } | |
| 5 options(show.error.messages = F, error = err_foo) | |
| 2 | 6 |
| 3 # we need that to not crash galaxy with an UTF8 error on German LC settings. | 7 # we need that to not crash galaxy with an UTF8 error on German LC settings. |
| 4 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 8 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
| 5 | 9 |
| 6 suppressPackageStartupMessages({ | 10 suppressPackageStartupMessages({ |
| 16 library(scales) | 20 library(scales) |
| 17 library(Rtsne) | 21 library(Rtsne) |
| 18 }) | 22 }) |
| 19 | 23 |
| 20 option_list <- list( | 24 option_list <- list( |
| 21 make_option(c("-bam","--bam"), type="character", help="BAM file"), | 25 make_option(c("-bam", "--bam"), type = "character", help = "BAM file"), |
| 22 make_option(c("-fasta","--fasta"), type="character", help="Genome fasta file"), | 26 make_option(c("-fasta", "--fasta"), type = "character", help = "Genome fasta file"), |
| 23 make_option(c("-exons","--exons"), type="character", help="Exon annotation gff3 file"), | 27 make_option(c("-exons", "--exons"), type = "character", help = "Exon annotation gff3 file"), |
| 24 make_option(c("-organism","--organism"), type="character", help="Organism e.g. hsapiens_gene_ensembl"), | 28 make_option(c("-organism", "--organism"), type = "character", help = "Organism e.g. hsapiens_gene_ensembl"), |
| 25 make_option(c("-barcodes","--barcodes"), type="character", help="Cell barcodes csv file"), | 29 make_option(c("-barcodes", "--barcodes"), type = "character", help = "Cell barcodes csv file"), |
| 26 make_option(c("-read1","--read1"), type="character", help="Read 1 fastq.gz"), | 30 make_option(c("-read1", "--read1"), type = "character", help = "Read 1 fastq.gz"), |
| 27 make_option(c("-read2","--read2"), type="character", help="Read 2 fastq.gz"), | 31 make_option(c("-read2", "--read2"), type = "character", help = "Read 2 fastq.gz"), |
| 28 make_option(c("-samplename","--samplename"), type="character", help="Name to use for sample"), | 32 make_option(c("-samplename", "--samplename"), type = "character", help = "Name to use for sample"), |
| 29 make_option(c("-bs1","--bs1"), type="integer", help="Barcode start in Read 1"), | 33 make_option(c("-bs1", "--bs1"), type = "integer", help = "Barcode start in Read 1"), |
| 30 make_option(c("-bl1","--bl1"), type="integer", help="Barcode length in Read 1"), | 34 make_option(c("-bl1", "--bl1"), type = "integer", help = "Barcode length in Read 1"), |
| 31 make_option(c("-bs2","--bs2"), type="integer", help="Barcode start in Read 2"), | 35 make_option(c("-bs2", "--bs2"), type = "integer", help = "Barcode start in Read 2"), |
| 32 make_option(c("-bl2","--bl2"), type="integer", help="Barcode length in Read 2"), | 36 make_option(c("-bl2", "--bl2"), type = "integer", help = "Barcode length in Read 2"), |
| 33 make_option(c("-us","--us"), type="integer", help="UMI start in Read 2"), | 37 make_option(c("-us", "--us"), type = "integer", help = "UMI start in Read 2"), |
| 34 make_option(c("-ul","--ul"), type="integer", help="UMI length in Read 2"), | 38 make_option(c("-ul", "--ul"), type = "integer", help = "UMI length in Read 2"), |
| 35 make_option(c("-rmlow","--rmlow"), type="logical", help="Remove reads with N in barcode or UMI"), | 39 make_option(c("-rmlow", "--rmlow"), type = "logical", help = "Remove reads with N in barcode or UMI"), |
| 36 make_option(c("-rmN","--rmN"), type="logical", help="Remove reads with low quality"), | 40 make_option(c("-rmN", "--rmN"), type = "logical", help = "Remove reads with low quality"), |
| 37 make_option(c("-minq","--minq"), type="integer", help="Minimum read quality"), | 41 make_option(c("-minq", "--minq"), type = "integer", help = "Minimum read quality"), |
| 38 make_option(c("-numbq","--numbq"), type="integer", help="Maximum number of bases below minq"), | 42 make_option(c("-numbq", "--numbq"), type = "integer", help = "Maximum number of bases below minq"), |
| 39 make_option(c("-stnd","--stnd"), type="logical", help="Perform strand-specific mapping"), | 43 make_option(c("-stnd", "--stnd"), type = "logical", help = "Perform strand-specific mapping"), |
| 40 make_option(c("-max_mis","--max_mis"), type="integer", help="Maximum mismatch allowed in barcode"), | 44 make_option(c("-max_mis", "--max_mis"), type = "integer", help = "Maximum mismatch allowed in barcode"), |
| 41 make_option(c("-UMI_cor","--UMI_cor"), type="integer", help="Correct UMI sequence error"), | 45 make_option(c("-UMI_cor", "--UMI_cor"), type = "integer", help = "Correct UMI sequence error"), |
| 42 make_option(c("-gene_fl","--gene_fl"), type="logical", help="Remove low abundant genes"), | 46 make_option(c("-gene_fl", "--gene_fl"), type = "logical", help = "Remove low abundant genes"), |
| 43 make_option(c("-max_reads","--max_reads"), type="integer", help="Maximum reads processed"), | 47 make_option(c("-max_reads", "--max_reads"), type = "integer", help = "Maximum reads processed"), |
| 44 make_option(c("-min_count","--min_count"), type="integer", help="Minimum count to keep"), | 48 make_option(c("-min_count", "--min_count"), type = "integer", help = "Minimum count to keep"), |
| 45 make_option(c("-metrics_matrix","--metrics_matrix"), type="logical", help="QC metrics matrix"), | 49 make_option(c("-metrics_matrix", "--metrics_matrix"), type = "logical", help = "QC metrics matrix"), |
| 46 make_option(c("-keep_outliers","--keep_outliers"), type="logical", help="Keep outlier cells"), | 50 make_option(c("-keep_outliers", "--keep_outliers"), type = "logical", help = "Keep outlier cells"), |
| 47 make_option(c("-report","--report"), type="logical", help="HTML report of plots"), | 51 make_option(c("-report", "--report"), type = "logical", help = "HTML report of plots"), |
| 48 make_option(c("-rdata","--rdata"), type="logical", help="Output RData file"), | 52 make_option(c("-rdata", "--rdata"), type = "logical", help = "Output RData file"), |
| 49 make_option(c("-nthreads","--nthreads"), type="integer", help="Number of threads") | 53 make_option(c("-nthreads", "--nthreads"), type = "integer", help = "Number of threads") |
| 50 ) | |
| 51 | |
| 52 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) | |
| 53 args = parse_args(parser) | |
| 54 | |
| 55 bam = args$bam | |
| 56 fa_fn = args$fasta | |
| 57 anno_fn = args$exons | |
| 58 fq_R1 = args$read1 | |
| 59 fq_R2 = args$read2 | |
| 60 read_structure = list( | |
| 61 bs1 = args$bs1, # barcode start position in fq_R1, -1 indicates no barcode | |
| 62 bl1 = args$bl1, # barcode length in fq_R1, 0 since no barcode present | |
| 63 bs2 = args$bs2, # barcode start position in fq_R2 | |
| 64 bl2 = args$bl2, # barcode length in fq_R2 | |
| 65 us = args$us, # UMI start position in fq_R2 | |
| 66 ul = args$ul # UMI length | |
| 67 ) | 54 ) |
| 68 | 55 |
| 56 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) | |
| 57 args <- parse_args(parser) | |
| 58 | |
| 59 bam <- args$bam | |
| 60 fa_fn <- args$fasta | |
| 61 anno_fn <- args$exons | |
| 62 fq_r1 <- args$read1 | |
| 63 fq_r2 <- args$read2 | |
| 64 read_structure <- list(bs1 = args$bs1, # barcode start position in fq_r1, -1 indicates no barcode | |
| 65 bl1 = args$bl1, # barcode length in fq_r1, 0 since no barcode present | |
| 66 bs2 = args$bs2, # barcode start position in fq_r2 | |
| 67 bl2 = args$bl2, # barcode length in fq_r2 | |
| 68 us = args$us, # UMI start position in fq_r2 | |
| 69 ul = args$ul # UMI length | |
| 70 ) | |
| 71 | |
| 69 if (args$us == -1) { | 72 if (args$us == -1) { |
| 70 has_umi = FALSE | 73 has_umi <- FALSE |
| 71 } else { | 74 } else { |
| 72 has_umi = TRUE | 75 has_umi <- TRUE |
| 73 } | 76 } |
| 74 | 77 |
| 75 filter_settings=list(rmlow=args$rmlow, rmN=args$rmN, minq=args$minq, numbq=args$numbq) | 78 filter_settings <- list(rmlow = args$rmlow, |
| 79 rmN = args$rmN, | |
| 80 minq = args$minq, | |
| 81 numbq = args$numbq) | |
| 76 | 82 |
| 77 # Outputs | 83 # Outputs |
| 78 out_dir = "." | 84 out_dir <- "." |
| 79 mapped_bam = file.path(out_dir, "aligned.mapped.bam") | 85 mapped_bam <- file.path(out_dir, "aligned.mapped.bam") |
| 80 | 86 |
| 81 # if input is fastqs | 87 # if input is fastqs |
| 82 if (!is.null(fa_fn)) { | 88 if (!is.null(fa_fn)) { |
| 83 fasta_index = file.path(out_dir, paste0(fa_fn, ".fasta_index")) | 89 fasta_index <- file.path(out_dir, paste0(fa_fn, ".fasta_index")) |
| 84 combined_fastq = file.path(out_dir, "combined.fastq") | 90 combined_fastq <- file.path(out_dir, "combined.fastq") |
| 85 aligned_bam = file.path(out_dir, "aligned.bam") | 91 aligned_bam <- file.path(out_dir, "aligned.bam") |
| 86 | 92 |
| 87 print("Trimming barcodes") | 93 print("Trimming barcodes") |
| 88 sc_trim_barcode(combined_fastq, | 94 sc_trim_barcode(combined_fastq, |
| 89 fq_R1, | 95 fq_r1, |
| 90 fq_R2, | 96 fq_r2, |
| 91 read_structure=read_structure, | 97 read_structure = read_structure, |
| 92 filter_settings=filter_settings) | 98 filter_settings = filter_settings) |
| 93 | 99 |
| 94 print("Building genome index") | 100 print("Building genome index") |
| 95 Rsubread::buildindex(basename=fasta_index, reference=fa_fn) | 101 Rsubread::buildindex(basename = fasta_index, reference = fa_fn) |
| 96 | 102 |
| 97 print("Aligning reads to genome") | 103 print("Aligning reads to genome") |
| 98 Rsubread::align(index=fasta_index, | 104 Rsubread::align(index = fasta_index, |
| 99 readfile1=combined_fastq, | 105 readfile1 = combined_fastq, |
| 100 output_file=aligned_bam, | 106 output_file = aligned_bam, |
| 101 nthreads=args$nthreads) | 107 nthreads = args$nthreads) |
| 102 | 108 |
| 103 if (!is.null(args$barcodes)) { | 109 if (!is.null(args$barcodes)) { |
| 104 barcode_anno=args$barcodes | 110 barcode_anno <- args$barcodes |
| 105 } else { | 111 } else { |
| 106 print("Detecting barcodes") | 112 print("Detecting barcodes") |
| 107 # detect 10X barcodes and generate sample_index.csv file | 113 # detect 10X barcodes and generate sample_index.csv file |
| 108 barcode_anno = "sample_index.csv" | 114 barcode_anno <- "sample_index.csv" |
| 109 sc_detect_bc( | 115 sc_detect_bc(infq = combined_fastq, |
| 110 infq=combined_fastq, | 116 outcsv = barcode_anno, |
| 111 outcsv=barcode_anno, | 117 bc_len = read_structure$bl2, |
| 112 bc_len=read_structure$bl2, | 118 max_reads = args$max_reads, |
| 113 max_reads=args$max_reads, | 119 min_count = args$min_count, |
| 114 min_count=args$min_count, | 120 max_mismatch = args$max_mis |
| 115 max_mismatch=args$max_mis | |
| 116 ) | 121 ) |
| 117 } | 122 } |
| 118 } else { | 123 } else { |
| 119 aligned_bam = file.path(out_dir, bam) | 124 aligned_bam <- file.path(out_dir, bam) |
| 120 barcode_anno=args$barcodes | 125 barcode_anno <- args$barcodes |
| 121 } | 126 } |
| 122 | 127 |
| 123 print("Assigning reads to exons") | 128 print("Assigning reads to exons") |
| 124 sc_exon_mapping(aligned_bam, mapped_bam, anno_fn, bc_len=read_structure$bl2, UMI_len=read_structure$ul, stnd=args$stnd) | 129 sc_exon_mapping(aligned_bam, mapped_bam, anno_fn, bc_len = read_structure$bl2, UMI_len = read_structure$ul, stnd = args$stnd) |
| 125 | 130 |
| 126 print("De-multiplexing data") | 131 print("De-multiplexing data") |
| 127 sc_demultiplex(mapped_bam, out_dir, barcode_anno, has_UMI=has_umi) | 132 sc_demultiplex(mapped_bam, out_dir, barcode_anno, has_UMI = has_umi) |
| 128 | 133 |
| 129 print("Counting genes") | 134 print("Counting genes") |
| 130 sc_gene_counting(out_dir, barcode_anno, UMI_cor=args$UMI_cor, gene_fl=args$gene_fl) | 135 sc_gene_counting(out_dir, barcode_anno, UMI_cor = args$UMI_cor, gene_fl = args$gene_fl) |
| 131 | 136 |
| 132 print("Performing QC") | 137 print("Performing QC") |
| 133 sce <- create_sce_by_dir(out_dir) | 138 sce <- create_sce_by_dir(out_dir) |
| 134 pdf("plots.pdf") | 139 pdf("plots.pdf") |
| 135 plot_demultiplex(sce) | 140 plot_demultiplex(sce) |
| 136 if (has_umi) { | 141 if (has_umi) { |
| 137 p = plot_UMI_dup(sce) | 142 p <- plot_UMI_dup(sce) |
| 138 print(p) | 143 print(p) |
| 139 } | 144 } |
| 140 sce = calculate_QC_metrics(sce) | 145 sce <- calculate_QC_metrics(sce) |
| 141 sce = detect_outlier(sce) | 146 sce <- detect_outlier(sce) |
| 142 p = plot_mapping(sce, percentage=TRUE, dataname=args$samplename) | 147 p <- plot_mapping(sce, percentage = TRUE, dataname = args$samplename) |
| 143 print(p) | 148 print(p) |
| 144 p = plot_QC_pairs(sce) | 149 p <- plot_QC_pairs(sce) |
| 145 print(p) | 150 print(p) |
| 146 dev.off() | 151 dev.off() |
| 147 | 152 |
| 148 print("Removing outliers") | 153 print("Removing outliers") |
| 149 if (is.null(args$keep_outliers)) { | 154 if (is.null(args$keep_outliers)) { |
| 150 sce = remove_outliers(sce) | 155 sce <- remove_outliers(sce) |
| 151 gene_counts <- counts(sce) | 156 gene_counts <- counts(sce) |
| 152 write.table(data.frame("gene_id"=rownames(gene_counts), gene_counts), file="gene_count.tsv", sep="\t", quote=FALSE, row.names=FALSE) | 157 write.table(data.frame("gene_id" = rownames(gene_counts), gene_counts), file = "gene_count.tsv", sep = "\t", quote = FALSE, row.names = FALSE) |
| 153 } | 158 } |
| 154 | 159 |
| 155 if (!is.null(args$metrics_matrix)) { | 160 if (!is.null(args$metrics_matrix)) { |
| 156 metrics <- colData(sce, internal=TRUE) | 161 metrics <- colData(sce, internal = TRUE) |
| 157 write.table(data.frame("cell_id"=rownames(metrics), metrics), file="metrics_matrix.tsv", sep="\t", quote=FALSE, row.names=FALSE) | 162 write.table(data.frame("cell_id" = rownames(metrics), metrics), file = "metrics_matrix.tsv", sep = "\t", quote = FALSE, row.names = FALSE) |
| 158 } | 163 } |
| 159 | 164 |
| 160 if (!is.null(args$report) & (!is.null(fa_fn))) { | 165 if (!is.null(args$report) & (!is.null(fa_fn))) { |
| 161 print("Creating report") | 166 print("Creating report") |
| 162 create_report(sample_name=args$samplename, | 167 create_report(sample_name = args$samplename, |
| 163 outdir=out_dir, | 168 outdir = out_dir, |
| 164 r1=fq_R1, | 169 r1 = fq_r1, |
| 165 r2=fq_R2, | 170 r2 = fq_r2, |
| 166 outfq=combined_fastq, | 171 outfq = combined_fastq, |
| 167 read_structure=read_structure, | 172 read_structure = read_structure, |
| 168 filter_settings=filter_settings, | 173 filter_settings = filter_settings, |
| 169 align_bam=aligned_bam, | 174 align_bam = aligned_bam, |
| 170 genome_index=fasta_index, | 175 genome_index = fasta_index, |
| 171 map_bam=mapped_bam, | 176 map_bam = mapped_bam, |
| 172 exon_anno=anno_fn, | 177 exon_anno = anno_fn, |
| 173 stnd=args$stnd, | 178 stnd = args$stnd, |
| 174 fix_chr=FALSE, | 179 fix_chr = FALSE, |
| 175 barcode_anno=barcode_anno, | 180 barcode_anno = barcode_anno, |
| 176 max_mis=args$max_mis, | 181 max_mis = args$max_mis, |
| 177 UMI_cor=args$UMI_cor, | 182 UMI_cor = args$UMI_cor, |
| 178 gene_fl=args$gene_fl, | 183 gene_fl = args$gene_fl, |
| 179 organism=args$organism, | 184 organism = args$organism, |
| 180 gene_id_type="ensembl_gene_id") | 185 gene_id_type = "ensembl_gene_id") |
| 181 } | 186 } |
| 182 | 187 |
| 183 if (!is.null(args$rdata) ) { | 188 if (!is.null(args$rdata)) { |
| 184 save(sce, file = file.path(out_dir,"scPipe_analysis.RData")) | 189 save(sce, file = file.path(out_dir, "scPipe_analysis.RData")) |
| 185 } | 190 } |
| 186 | 191 |
| 187 sessionInfo() | 192 sessionInfo() |
