Mercurial > repos > iuc > scpipe
comparison scpipe.R @ 2:9f8ce2980849 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scpipe commit 60e2a9e9129a22924c55b11b218b39d913c7e686
| author | iuc |
|---|---|
| date | Mon, 14 Jan 2019 08:06:19 -0500 |
| parents | 57baf87c7fcd |
| children | 25e52048c7cc |
comparison
equal
deleted
inserted
replaced
| 1:9b55acf357ec | 2:9f8ce2980849 |
|---|---|
| 16 library(scales) | 16 library(scales) |
| 17 library(Rtsne) | 17 library(Rtsne) |
| 18 }) | 18 }) |
| 19 | 19 |
| 20 option_list <- list( | 20 option_list <- list( |
| 21 make_option(c("-bam","--bam"), type="character", help="BAM file"), | |
| 21 make_option(c("-fasta","--fasta"), type="character", help="Genome fasta file"), | 22 make_option(c("-fasta","--fasta"), type="character", help="Genome fasta file"), |
| 22 make_option(c("-exons","--exons"), type="character", help="Exon annotation gff3 file"), | 23 make_option(c("-exons","--exons"), type="character", help="Exon annotation gff3 file"), |
| 24 make_option(c("-organism","--organism"), type="character", help="Organism e.g. hsapiens_gene_ensembl"), | |
| 23 make_option(c("-barcodes","--barcodes"), type="character", help="Cell barcodes csv file"), | 25 make_option(c("-barcodes","--barcodes"), type="character", help="Cell barcodes csv file"), |
| 24 make_option(c("-read1","--read1"), type="character", help="Read 1 fastq.gz"), | 26 make_option(c("-read1","--read1"), type="character", help="Read 1 fastq.gz"), |
| 25 make_option(c("-read2","--read2"), type="character", help="Read 2 fastq.gz"), | 27 make_option(c("-read2","--read2"), type="character", help="Read 2 fastq.gz"), |
| 26 make_option(c("-samplename","--samplename"), type="character", help="Name to use for sample"), | 28 make_option(c("-samplename","--samplename"), type="character", help="Name to use for sample"), |
| 27 make_option(c("-bs1","--bs1"), type="integer", help="Barcode start in Read 1"), | 29 make_option(c("-bs1","--bs1"), type="integer", help="Barcode start in Read 1"), |
| 38 make_option(c("-max_mis","--max_mis"), type="integer", help="Maximum mismatch allowed in barcode"), | 40 make_option(c("-max_mis","--max_mis"), type="integer", help="Maximum mismatch allowed in barcode"), |
| 39 make_option(c("-UMI_cor","--UMI_cor"), type="integer", help="Correct UMI sequence error"), | 41 make_option(c("-UMI_cor","--UMI_cor"), type="integer", help="Correct UMI sequence error"), |
| 40 make_option(c("-gene_fl","--gene_fl"), type="logical", help="Remove low abundant genes"), | 42 make_option(c("-gene_fl","--gene_fl"), type="logical", help="Remove low abundant genes"), |
| 41 make_option(c("-max_reads","--max_reads"), type="integer", help="Maximum reads processed"), | 43 make_option(c("-max_reads","--max_reads"), type="integer", help="Maximum reads processed"), |
| 42 make_option(c("-min_count","--min_count"), type="integer", help="Minimum count to keep"), | 44 make_option(c("-min_count","--min_count"), type="integer", help="Minimum count to keep"), |
| 45 make_option(c("-metrics_matrix","--metrics_matrix"), type="logical", help="QC metrics matrix"), | |
| 46 make_option(c("-keep_outliers","--keep_outliers"), type="logical", help="Keep outlier cells"), | |
| 43 make_option(c("-report","--report"), type="logical", help="HTML report of plots"), | 47 make_option(c("-report","--report"), type="logical", help="HTML report of plots"), |
| 44 make_option(c("-rdata","--rdata"), type="logical", help="Output RData file"), | 48 make_option(c("-rdata","--rdata"), type="logical", help="Output RData file"), |
| 45 make_option(c("-nthreads","--nthreads"), type="integer", help="Number of threads") | 49 make_option(c("-nthreads","--nthreads"), type="integer", help="Number of threads") |
| 46 ) | 50 ) |
| 47 | 51 |
| 48 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) | 52 parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) |
| 49 args = parse_args(parser) | 53 args = parse_args(parser) |
| 50 | 54 |
| 55 bam = args$bam | |
| 51 fa_fn = args$fasta | 56 fa_fn = args$fasta |
| 52 anno_fn = args$exons | 57 anno_fn = args$exons |
| 53 fq_R1 = args$read1 | 58 fq_R1 = args$read1 |
| 54 fq_R2 = args$read2 | 59 fq_R2 = args$read2 |
| 55 read_structure = list( | 60 read_structure = list( |
| 69 | 74 |
| 70 filter_settings=list(rmlow=args$rmlow, rmN=args$rmN, minq=args$minq, numbq=args$numbq) | 75 filter_settings=list(rmlow=args$rmlow, rmN=args$rmN, minq=args$minq, numbq=args$numbq) |
| 71 | 76 |
| 72 # Outputs | 77 # Outputs |
| 73 out_dir = "." | 78 out_dir = "." |
| 74 fasta_index = file.path(out_dir, paste0(fa_fn, ".fasta_index")) | |
| 75 combined_fastq = file.path(out_dir, "combined.fastq") | |
| 76 aligned_bam = file.path(out_dir, "aligned.bam") | |
| 77 mapped_bam = file.path(out_dir, "aligned.mapped.bam") | 79 mapped_bam = file.path(out_dir, "aligned.mapped.bam") |
| 78 | 80 |
| 81 # if input is fastqs | |
| 82 if (!is.null(fa_fn)) { | |
| 83 fasta_index = file.path(out_dir, paste0(fa_fn, ".fasta_index")) | |
| 84 combined_fastq = file.path(out_dir, "combined.fastq") | |
| 85 aligned_bam = file.path(out_dir, "aligned.bam") | |
| 79 | 86 |
| 80 print("Trimming barcodes") | 87 print("Trimming barcodes") |
| 81 sc_trim_barcode(combined_fastq, | 88 sc_trim_barcode(combined_fastq, |
| 82 fq_R1, | 89 fq_R1, |
| 83 fq_R2, | 90 fq_R2, |
| 84 read_structure=read_structure, | 91 read_structure=read_structure, |
| 85 filter_settings=filter_settings) | 92 filter_settings=filter_settings) |
| 86 | 93 |
| 87 print("Building genome index") | 94 print("Building genome index") |
| 88 Rsubread::buildindex(basename=fasta_index, reference=fa_fn) | 95 Rsubread::buildindex(basename=fasta_index, reference=fa_fn) |
| 89 | 96 |
| 90 print("Aligning reads to genome") | 97 print("Aligning reads to genome") |
| 91 Rsubread::align(index=fasta_index, | 98 Rsubread::align(index=fasta_index, |
| 92 readfile1=combined_fastq, | 99 readfile1=combined_fastq, |
| 93 output_file=aligned_bam, | 100 output_file=aligned_bam, |
| 94 nthreads=args$nthreads) | 101 nthreads=args$nthreads) |
| 95 | 102 |
| 96 if (!is.null(args$barcodes)) { | 103 if (!is.null(args$barcodes)) { |
| 97 barcode_anno=args$barcodes | 104 barcode_anno=args$barcodes |
| 105 } else { | |
| 106 print("Detecting barcodes") | |
| 107 # detect 10X barcodes and generate sample_index.csv file | |
| 108 barcode_anno = "sample_index.csv" | |
| 109 sc_detect_bc( | |
| 110 infq=combined_fastq, | |
| 111 outcsv=barcode_anno, | |
| 112 bc_len=read_structure$bl2, | |
| 113 max_reads=args$max_reads, | |
| 114 min_count=args$min_count, | |
| 115 max_mismatch=args$max_mis | |
| 116 ) | |
| 117 } | |
| 98 } else { | 118 } else { |
| 99 print("Detecting barcodes") | 119 aligned_bam = file.path(out_dir, bam) |
| 100 # detect 10X barcodes and generate sample_index.csv file | 120 barcode_anno=args$barcodes |
| 101 barcode_anno = "sample_index.csv" | |
| 102 sc_detect_bc( | |
| 103 infq=combined_fastq, | |
| 104 outcsv=barcode_anno, | |
| 105 bc_len=read_structure$bl2, | |
| 106 max_reads=args$max_reads, | |
| 107 min_count=args$min_count, | |
| 108 max_mismatch=args$max_mis | |
| 109 ) | |
| 110 } | 121 } |
| 111 | 122 |
| 112 print("Assigning reads to exons") | 123 print("Assigning reads to exons") |
| 113 sc_exon_mapping(aligned_bam, mapped_bam, anno_fn, bc_len=read_structure$bl2, UMI_len=read_structure$ul, stnd=args$stnd) | 124 sc_exon_mapping(aligned_bam, mapped_bam, anno_fn, bc_len=read_structure$bl2, UMI_len=read_structure$ul, stnd=args$stnd) |
| 114 | 125 |
| 116 sc_demultiplex(mapped_bam, out_dir, barcode_anno, has_UMI=has_umi) | 127 sc_demultiplex(mapped_bam, out_dir, barcode_anno, has_UMI=has_umi) |
| 117 | 128 |
| 118 print("Counting genes") | 129 print("Counting genes") |
| 119 sc_gene_counting(out_dir, barcode_anno, UMI_cor=args$UMI_cor, gene_fl=args$gene_fl) | 130 sc_gene_counting(out_dir, barcode_anno, UMI_cor=args$UMI_cor, gene_fl=args$gene_fl) |
| 120 | 131 |
| 121 print("Creating SingleCellExperiment object") | 132 print("Performing QC") |
| 122 sce <- create_sce_by_dir(out_dir) | 133 sce <- create_sce_by_dir(out_dir) |
| 134 pdf("plots.pdf") | |
| 135 plot_demultiplex(sce) | |
| 136 if (has_umi) { | |
| 137 p = plot_UMI_dup(sce) | |
| 138 print(p) | |
| 139 } | |
| 140 sce = calculate_QC_metrics(sce) | |
| 141 sce = detect_outlier(sce) | |
| 142 p = plot_mapping(sce, percentage=TRUE, dataname=args$samplename) | |
| 143 print(p) | |
| 144 p = plot_QC_pairs(sce) | |
| 145 print(p) | |
| 146 dev.off() | |
| 123 | 147 |
| 124 if (!is.null(args$report)) { | 148 print("Removing outliers") |
| 125 print("Creating report") | 149 if (is.null(args$keep_outliers)) { |
| 126 create_report(sample_name=args$samplename, | 150 sce = remove_outliers(sce) |
| 127 outdir=out_dir, | 151 gene_counts <- counts(sce) |
| 128 r1=fq_R1, | 152 write.table(data.frame("gene_id"=rownames(gene_counts), gene_counts), file="gene_count.tsv", sep="\t", quote=FALSE, row.names=FALSE) |
| 129 r2=fq_R2, | 153 } |
| 130 outfq=combined_fastq, | 154 |
| 131 read_structure=read_structure, | 155 if (!is.null(args$metrics_matrix)) { |
| 132 filter_settings=filter_settings, | 156 metrics <- colData(sce, internal=TRUE) |
| 133 align_bam=aligned_bam, | 157 write.table(data.frame("cell_id"=rownames(metrics), metrics), file="metrics_matrix.tsv", sep="\t", quote=FALSE, row.names=FALSE) |
| 134 genome_index=fasta_index, | 158 } |
| 135 map_bam=mapped_bam, | 159 |
| 136 exon_anno=anno_fn, | 160 if (!is.null(args$report) & (!is.null(fa_fn))) { |
| 137 stnd=args$stnd, | 161 print("Creating report") |
| 138 fix_chr=FALSE, | 162 create_report(sample_name=args$samplename, |
| 139 barcode_anno=barcode_anno, | 163 outdir=out_dir, |
| 140 max_mis=args$max_mis, | 164 r1=fq_R1, |
| 141 UMI_cor=args$UMI_cor, | 165 r2=fq_R2, |
| 142 gene_fl=args$gene_fl) | 166 outfq=combined_fastq, |
| 167 read_structure=read_structure, | |
| 168 filter_settings=filter_settings, | |
| 169 align_bam=aligned_bam, | |
| 170 genome_index=fasta_index, | |
| 171 map_bam=mapped_bam, | |
| 172 exon_anno=anno_fn, | |
| 173 stnd=args$stnd, | |
| 174 fix_chr=FALSE, | |
| 175 barcode_anno=barcode_anno, | |
| 176 max_mis=args$max_mis, | |
| 177 UMI_cor=args$UMI_cor, | |
| 178 gene_fl=args$gene_fl, | |
| 179 organism=args$organism, | |
| 180 gene_id_type="ensembl_gene_id") | |
| 143 } | 181 } |
| 144 | 182 |
| 145 if (!is.null(args$rdata) ) { | 183 if (!is.null(args$rdata) ) { |
| 146 save(sce, file = file.path(out_dir,"scPipe_analysis.RData")) | 184 save(sce, file = file.path(out_dir,"scPipe_analysis.RData")) |
| 147 } | 185 } |
| 148 | 186 |
| 149 sessionInfo() | 187 sessionInfo() |
