Mercurial > repos > greg > multilocus_genotype
view multilocus_genotype.R @ 4:a7cce4091e80 draft
Uploaded
author | greg |
---|---|
date | Wed, 21 Nov 2018 09:14:09 -0500 |
parents | 1bc815d9c8c5 |
children | 18001e7cb199 |
line wrap: on
line source
#!/usr/bin/env Rscript suppressPackageStartupMessages(library("adegenet")) suppressPackageStartupMessages(library("ape")) suppressPackageStartupMessages(library("ggplot2")) suppressPackageStartupMessages(library("knitr")) suppressPackageStartupMessages(library("optparse")) suppressPackageStartupMessages(library("poppr")) suppressPackageStartupMessages(library("RColorBrewer")) suppressPackageStartupMessages(library("vcfR")) suppressPackageStartupMessages(library("vegan")) option_list <- list( make_option(c("--input_vcf"), action="store", dest="input_vcf", help="VCF input file"), make_option(c("--input_pop_info"), action="store", dest="input_pop_info", help="Population information input file"), make_option(c("--output_missing_data"), action="store", dest="output_missing_data", help="Missing data outputfile"), make_option(c("--output_mlg_id"), action="store", dest="output_mlg_id", help="Mlg Id data outputfile") ) parser <- OptionParser(usage="%prog [options] file", option_list=option_list); args <- parse_args(parser, positional_arguments=TRUE); opt <- args$options; get_file_path = function(file_name) { file_path = paste("output_plots_dir", file_name, sep="/"); return(file_path); } # Extract Provesti's distance from the distance matrix. provesti_distance <- function(distance, selection) { eval(parse(text=paste("as.matrix(distance)[", selection, "]"))); } # Read in VCF input file. vcf <- read.vcfR(opt$input_vcf); #Missing GT in samples submitted gt <- extract.gt(vcf, element="GT", as.numeric=FALSE); myMiss <- apply(gt, MARGIN=2, function(x){ sum(is.na(x))}); myMiss <- (myMiss / nrow(vcf)) * 100; miss <- data.frame(myMiss); write.table(miss, file=opt$output_missing_data, quote=FALSE); # Convert VCF file into formats compatiable with the Poppr package. genind <- vcfR2genind(vcf); # Add population information to the genind object. poptab <- read.table(opt$input_pop_info, check.names=FALSE, header=T, na.strings = c("", "NA")); genind@pop <- as.factor(poptab$region); # Convert genind to genclone object gclo <- as.genclone(genind); # Calculate the bitwise distance between individuals, # the following is similar to Provesti's distance. xdis <- bitwise.dist(gclo, missing_match=FALSE); # Multilocus genotypes (threshold of 1.6%). mlg.filter(gclo, distance=xdis) <- 0.016; # Start PDF device driver. dev.new(width=10, height=7); file_path = get_file_path("mlg_table.pdf"); pdf(file=file_path, width=10, height=7); m <- mlg.table(gclo, background=TRUE, color=TRUE); dev.off(); # Create table of MLGs. id <- mlg.id(gclo); df <- data.frame(matrix((id), byrow=T)); write.table(df, file=opt$output_mlg_id); # Rarifaction curve. H.obj <- mlg.table(gclo, plot=TRUE); # Start PDF device driver. dev.new(width=10, height=7); file_path = get_file_path("geno_rarifaction_curve.pdf"); pdf(file=file_path, width=10, height=7); rarecurve(H.obj, ylab="Number of expected MLGs", sample=min(rowSums(H.obj)), border=NA, fill=NA, font=2, cex=1, col="blue"); dev.off() # Create a phylogeny of samples based on distance matrices. cols <- palette(brewer.pal(n=12, name='Set3')); set.seed(999); # Start PDF device driver. dev.new(width=10, height=7); file_path = get_file_path("nj_phylogeny.pdf"); pdf(file=file_path, width=10, height=7); # Organize branches by clade. tree <- gclo %>% aboot(dist=provesti.dist, sample=10, tree="nj", cutoff=50, quiet=TRUE) %>% ladderize(); plot.phylo(tree, tip.color=cols[obj2$pop],label.offset=0.0125, cex=0.7, font=2, lwd=4); # Add a scale bar showing 5% difference.. add.scale.bar(length=0.05, cex=0.65); nodelabels(tree$node.label, cex=.5, adj=c(1.5, -0.1), frame="n", font=3, xpd=TRUE); dev.off()