# HG changeset patch # User greg # Date 1540488827 14400 # Node ID 1bc815d9c8c5b9d7a80cb64b99d48920b5af2ece # Parent 86aaadf36a4f17ccec2ed43fb09d46a6873dc4d4 Uploaded diff -r 86aaadf36a4f -r 1bc815d9c8c5 multilocus_genotype.R --- a/multilocus_genotype.R Thu Oct 25 11:10:17 2018 -0400 +++ b/multilocus_genotype.R Thu Oct 25 13:33:47 2018 -0400 @@ -22,21 +22,21 @@ return(file_path); } -#extract Provesti's distance from the distance matrix +# Extract Provesti's distance from the distance matrix. provesti_distance <- function(distance, selection) { eval(parse(text=paste("as.matrix(distance)[", selection, "]"))); } -#Read in VCF input file. +# Read in VCF input file. vcf <- read.vcfR(opt$input_vcf); # Convert VCF file into formats compatiable with the Poppr package. -gind <- vcfR2genind(vcf); +genind <- vcfR2genind(vcf); # Add population information to the genind object. poptab <- read.table(opt$input_pop_info, check.names=FALSE, header=T, na.strings = c("", "NA")); -gind@pop <- as.factor(poptab$region); +genind@pop <- as.factor(poptab$region); # Convert genind to genclone object -gclo <- as.genclone(gind); +gclo <- as.genclone(genind); # Calculate the bitwise distance between individuals, # the following is similar to Provesti's distance. xdis <- bitwise.dist(gclo); @@ -52,7 +52,7 @@ # Start PDF device driver. dev.new(width=20, height=30); -file_path = get_file_path("phylogeny_tree.pdf") +file_path = get_file_path("phylogeny_tree.pdf"); pdf(file=file_path, width=20, height=30, bg="white"); # Create a phylogeny of samples based on distance matrices # colors. @@ -72,7 +72,7 @@ # Start PDF device driver. dev.new(width=20, height=30); -file_path = get_file_path("dissimiliarity_distance_matrix.pdf") +file_path = get_file_path("dissimiliarity_distance_matrix.pdf"); pdf(file=file_path, width=20, height=30, bg="white"); # Use of mlg.filter() will create a dissimiliarity distance # matrix from the data and then filter based off of that @@ -87,7 +87,7 @@ # Start PDF device driver. dev.new(width=20, height=30); -file_path = get_file_path("filter_stats.pdf") +file_path = get_file_path("filter_stats.pdf"); pdf(file=file_path, width=20, height=30, bg="white"); # Different clustering methods for tie breakers used by # mlg.filter, default is farthest neighbor. @@ -101,18 +101,18 @@ # Start PDF device driver. dev.new(width=20, height=30); -file_path = get_file_path("genotype_accumulation_curve.pdf") +file_path = get_file_path("genotype_accumulation_curve.pdf"); pdf(file=file_path, width=20, height=30, bg="white"); # We can use the genotype_curve() function to create a # genotype accumulation curve to determine the minimum # number of loci to identify unique MLGs. -gac <- genotype_curve(gind, sample=5, quiet=TRUE); +gac <- genotype_curve(genind, sample=5, quiet=TRUE); # Turn off device driver to flush output. dev.off(); # Start PDF device driver. dev.new(width=20, height=30); -file_path = get_file_path("genotype_accumulation_curve_for_gind.pdf") +file_path = get_file_path("genotype_accumulation_curve_for_genind.pdf"); pdf(file=file_path, width=20, height=30, bg="white"); p <- last_plot(); p + geom_smooth() + xlim(0, 100) + theme_bw();