0
|
1 #!/usr/bin/env Rscript
|
|
2
|
|
3 suppressPackageStartupMessages(library("adegenet"))
|
|
4 suppressPackageStartupMessages(library("ape"))
|
|
5 suppressPackageStartupMessages(library("ggplot2"))
|
|
6 suppressPackageStartupMessages(library("knitr"))
|
4
|
7 suppressPackageStartupMessages(library("optparse"))
|
|
8 suppressPackageStartupMessages(library("poppr"))
|
|
9 suppressPackageStartupMessages(library("RColorBrewer"))
|
|
10 suppressPackageStartupMessages(library("vcfR"))
|
|
11 suppressPackageStartupMessages(library("vegan"))
|
0
|
12
|
|
13 option_list <- list(
|
2
|
14 make_option(c("--input_vcf"), action="store", dest="input_vcf", help="VCF input file"),
|
4
|
15 make_option(c("--input_pop_info"), action="store", dest="input_pop_info", help="Population information input file"),
|
|
16 make_option(c("--output_missing_data"), action="store", dest="output_missing_data", help="Missing data outputfile"),
|
|
17 make_option(c("--output_mlg_id"), action="store", dest="output_mlg_id", help="Mlg Id data outputfile")
|
0
|
18 )
|
|
19
|
|
20 parser <- OptionParser(usage="%prog [options] file", option_list=option_list);
|
|
21 args <- parse_args(parser, positional_arguments=TRUE);
|
|
22 opt <- args$options;
|
|
23
|
|
24 get_file_path = function(file_name) {
|
|
25 file_path = paste("output_plots_dir", file_name, sep="/");
|
|
26 return(file_path);
|
|
27 }
|
|
28
|
3
|
29 # Extract Provesti's distance from the distance matrix.
|
0
|
30 provesti_distance <- function(distance, selection) {
|
|
31 eval(parse(text=paste("as.matrix(distance)[", selection, "]")));
|
|
32 }
|
|
33
|
3
|
34 # Read in VCF input file.
|
2
|
35 vcf <- read.vcfR(opt$input_vcf);
|
0
|
36
|
4
|
37 #Missing GT in samples submitted
|
|
38 gt <- extract.gt(vcf, element="GT", as.numeric=FALSE);
|
|
39 myMiss <- apply(gt, MARGIN=2, function(x){ sum(is.na(x))});
|
|
40 myMiss <- (myMiss / nrow(vcf)) * 100;
|
|
41 miss <- data.frame(myMiss);
|
|
42 write.table(miss, file=opt$output_missing_data, quote=FALSE);
|
|
43
|
0
|
44 # Convert VCF file into formats compatiable with the Poppr package.
|
3
|
45 genind <- vcfR2genind(vcf);
|
0
|
46 # Add population information to the genind object.
|
|
47 poptab <- read.table(opt$input_pop_info, check.names=FALSE, header=T, na.strings = c("", "NA"));
|
3
|
48 genind@pop <- as.factor(poptab$region);
|
0
|
49 # Convert genind to genclone object
|
3
|
50 gclo <- as.genclone(genind);
|
0
|
51 # Calculate the bitwise distance between individuals,
|
|
52 # the following is similar to Provesti's distance.
|
4
|
53 xdis <- bitwise.dist(gclo, missing_match=FALSE);
|
0
|
54
|
4
|
55 # Multilocus genotypes (threshold of 1.6%).
|
|
56 mlg.filter(gclo, distance=xdis) <- 0.016;
|
0
|
57 # Start PDF device driver.
|
4
|
58 dev.new(width=10, height=7);
|
|
59 file_path = get_file_path("mlg_table.pdf");
|
|
60 pdf(file=file_path, width=10, height=7);
|
0
|
61 m <- mlg.table(gclo, background=TRUE, color=TRUE);
|
|
62 dev.off();
|
|
63
|
|
64 # Create table of MLGs.
|
|
65 id <- mlg.id(gclo);
|
|
66 df <- data.frame(matrix((id), byrow=T));
|
4
|
67 write.table(df, file=opt$output_mlg_id);
|
0
|
68
|
4
|
69 # Rarifaction curve.
|
|
70 H.obj <- mlg.table(gclo, plot=TRUE);
|
0
|
71 # Start PDF device driver.
|
4
|
72 dev.new(width=10, height=7);
|
|
73 file_path = get_file_path("geno_rarifaction_curve.pdf");
|
|
74 pdf(file=file_path, width=10, height=7);
|
|
75 rarecurve(H.obj, ylab="Number of expected MLGs", sample=min(rowSums(H.obj)), border=NA, fill=NA, font=2, cex=1, col="blue");
|
|
76 dev.off()
|
0
|
77
|
4
|
78 # Create a phylogeny of samples based on distance matrices.
|
|
79 cols <- palette(brewer.pal(n=12, name='Set3'));
|
|
80 set.seed(999);
|
|
81 # Start PDF device driver.
|
|
82 dev.new(width=10, height=7);
|
|
83 file_path = get_file_path("nj_phylogeny.pdf");
|
|
84 pdf(file=file_path, width=10, height=7);
|
|
85 # Organize branches by clade.
|
|
86 tree <- gclo %>% aboot(dist=provesti.dist, sample=10, tree="nj", cutoff=50, quiet=TRUE) %>% ladderize();
|
|
87 plot.phylo(tree, tip.color=cols[obj2$pop],label.offset=0.0125, cex=0.7, font=2, lwd=4);
|
|
88 # Add a scale bar showing 5% difference..
|
|
89 add.scale.bar(length=0.05, cex=0.65);
|
|
90 nodelabels(tree$node.label, cex=.5, adj=c(1.5, -0.1), frame="n", font=3, xpd=TRUE);
|
|
91 dev.off()
|
0
|
92
|