annotate multilocus_genotype.R @ 4:a7cce4091e80 draft

Uploaded
author greg
date Wed, 21 Nov 2018 09:14:09 -0500
parents 1bc815d9c8c5
children 18001e7cb199
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
1 #!/usr/bin/env Rscript
725b160c91f0 Uploaded
greg
parents:
diff changeset
2
725b160c91f0 Uploaded
greg
parents:
diff changeset
3 suppressPackageStartupMessages(library("adegenet"))
725b160c91f0 Uploaded
greg
parents:
diff changeset
4 suppressPackageStartupMessages(library("ape"))
725b160c91f0 Uploaded
greg
parents:
diff changeset
5 suppressPackageStartupMessages(library("ggplot2"))
725b160c91f0 Uploaded
greg
parents:
diff changeset
6 suppressPackageStartupMessages(library("knitr"))
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
7 suppressPackageStartupMessages(library("optparse"))
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
8 suppressPackageStartupMessages(library("poppr"))
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
9 suppressPackageStartupMessages(library("RColorBrewer"))
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
10 suppressPackageStartupMessages(library("vcfR"))
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
11 suppressPackageStartupMessages(library("vegan"))
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
12
725b160c91f0 Uploaded
greg
parents:
diff changeset
13 option_list <- list(
2
86aaadf36a4f Uploaded
greg
parents: 0
diff changeset
14 make_option(c("--input_vcf"), action="store", dest="input_vcf", help="VCF input file"),
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
15 make_option(c("--input_pop_info"), action="store", dest="input_pop_info", help="Population information input file"),
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
16 make_option(c("--output_missing_data"), action="store", dest="output_missing_data", help="Missing data outputfile"),
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
17 make_option(c("--output_mlg_id"), action="store", dest="output_mlg_id", help="Mlg Id data outputfile")
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
18 )
725b160c91f0 Uploaded
greg
parents:
diff changeset
19
725b160c91f0 Uploaded
greg
parents:
diff changeset
20 parser <- OptionParser(usage="%prog [options] file", option_list=option_list);
725b160c91f0 Uploaded
greg
parents:
diff changeset
21 args <- parse_args(parser, positional_arguments=TRUE);
725b160c91f0 Uploaded
greg
parents:
diff changeset
22 opt <- args$options;
725b160c91f0 Uploaded
greg
parents:
diff changeset
23
725b160c91f0 Uploaded
greg
parents:
diff changeset
24 get_file_path = function(file_name) {
725b160c91f0 Uploaded
greg
parents:
diff changeset
25 file_path = paste("output_plots_dir", file_name, sep="/");
725b160c91f0 Uploaded
greg
parents:
diff changeset
26 return(file_path);
725b160c91f0 Uploaded
greg
parents:
diff changeset
27 }
725b160c91f0 Uploaded
greg
parents:
diff changeset
28
3
1bc815d9c8c5 Uploaded
greg
parents: 2
diff changeset
29 # Extract Provesti's distance from the distance matrix.
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
30 provesti_distance <- function(distance, selection) {
725b160c91f0 Uploaded
greg
parents:
diff changeset
31 eval(parse(text=paste("as.matrix(distance)[", selection, "]")));
725b160c91f0 Uploaded
greg
parents:
diff changeset
32 }
725b160c91f0 Uploaded
greg
parents:
diff changeset
33
3
1bc815d9c8c5 Uploaded
greg
parents: 2
diff changeset
34 # Read in VCF input file.
2
86aaadf36a4f Uploaded
greg
parents: 0
diff changeset
35 vcf <- read.vcfR(opt$input_vcf);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
36
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
37 #Missing GT in samples submitted
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
38 gt <- extract.gt(vcf, element="GT", as.numeric=FALSE);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
39 myMiss <- apply(gt, MARGIN=2, function(x){ sum(is.na(x))});
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
40 myMiss <- (myMiss / nrow(vcf)) * 100;
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
41 miss <- data.frame(myMiss);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
42 write.table(miss, file=opt$output_missing_data, quote=FALSE);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
43
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
44 # Convert VCF file into formats compatiable with the Poppr package.
3
1bc815d9c8c5 Uploaded
greg
parents: 2
diff changeset
45 genind <- vcfR2genind(vcf);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
46 # Add population information to the genind object.
725b160c91f0 Uploaded
greg
parents:
diff changeset
47 poptab <- read.table(opt$input_pop_info, check.names=FALSE, header=T, na.strings = c("", "NA"));
3
1bc815d9c8c5 Uploaded
greg
parents: 2
diff changeset
48 genind@pop <- as.factor(poptab$region);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
49 # Convert genind to genclone object
3
1bc815d9c8c5 Uploaded
greg
parents: 2
diff changeset
50 gclo <- as.genclone(genind);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
51 # Calculate the bitwise distance between individuals,
725b160c91f0 Uploaded
greg
parents:
diff changeset
52 # the following is similar to Provesti's distance.
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
53 xdis <- bitwise.dist(gclo, missing_match=FALSE);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
54
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
55 # Multilocus genotypes (threshold of 1.6%).
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
56 mlg.filter(gclo, distance=xdis) <- 0.016;
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
57 # Start PDF device driver.
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
58 dev.new(width=10, height=7);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
59 file_path = get_file_path("mlg_table.pdf");
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
60 pdf(file=file_path, width=10, height=7);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
61 m <- mlg.table(gclo, background=TRUE, color=TRUE);
725b160c91f0 Uploaded
greg
parents:
diff changeset
62 dev.off();
725b160c91f0 Uploaded
greg
parents:
diff changeset
63
725b160c91f0 Uploaded
greg
parents:
diff changeset
64 # Create table of MLGs.
725b160c91f0 Uploaded
greg
parents:
diff changeset
65 id <- mlg.id(gclo);
725b160c91f0 Uploaded
greg
parents:
diff changeset
66 df <- data.frame(matrix((id), byrow=T));
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
67 write.table(df, file=opt$output_mlg_id);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
68
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
69 # Rarifaction curve.
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
70 H.obj <- mlg.table(gclo, plot=TRUE);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
71 # Start PDF device driver.
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
72 dev.new(width=10, height=7);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
73 file_path = get_file_path("geno_rarifaction_curve.pdf");
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
74 pdf(file=file_path, width=10, height=7);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
75 rarecurve(H.obj, ylab="Number of expected MLGs", sample=min(rowSums(H.obj)), border=NA, fill=NA, font=2, cex=1, col="blue");
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
76 dev.off()
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
77
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
78 # Create a phylogeny of samples based on distance matrices.
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
79 cols <- palette(brewer.pal(n=12, name='Set3'));
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
80 set.seed(999);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
81 # Start PDF device driver.
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
82 dev.new(width=10, height=7);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
83 file_path = get_file_path("nj_phylogeny.pdf");
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
84 pdf(file=file_path, width=10, height=7);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
85 # Organize branches by clade.
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
86 tree <- gclo %>% aboot(dist=provesti.dist, sample=10, tree="nj", cutoff=50, quiet=TRUE) %>% ladderize();
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
87 plot.phylo(tree, tip.color=cols[obj2$pop],label.offset=0.0125, cex=0.7, font=2, lwd=4);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
88 # Add a scale bar showing 5% difference..
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
89 add.scale.bar(length=0.05, cex=0.65);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
90 nodelabels(tree$node.label, cex=.5, adj=c(1.5, -0.1), frame="n", font=3, xpd=TRUE);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
91 dev.off()
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
92