Mercurial > repos > greg > multilocus_genotype
comparison multilocus_genotype.R @ 3:1bc815d9c8c5 draft
Uploaded
author | greg |
---|---|
date | Thu, 25 Oct 2018 13:33:47 -0400 |
parents | 86aaadf36a4f |
children | a7cce4091e80 |
comparison
equal
deleted
inserted
replaced
2:86aaadf36a4f | 3:1bc815d9c8c5 |
---|---|
20 get_file_path = function(file_name) { | 20 get_file_path = function(file_name) { |
21 file_path = paste("output_plots_dir", file_name, sep="/"); | 21 file_path = paste("output_plots_dir", file_name, sep="/"); |
22 return(file_path); | 22 return(file_path); |
23 } | 23 } |
24 | 24 |
25 #extract Provesti's distance from the distance matrix | 25 # Extract Provesti's distance from the distance matrix. |
26 provesti_distance <- function(distance, selection) { | 26 provesti_distance <- function(distance, selection) { |
27 eval(parse(text=paste("as.matrix(distance)[", selection, "]"))); | 27 eval(parse(text=paste("as.matrix(distance)[", selection, "]"))); |
28 } | 28 } |
29 | 29 |
30 #Read in VCF input file. | 30 # Read in VCF input file. |
31 vcf <- read.vcfR(opt$input_vcf); | 31 vcf <- read.vcfR(opt$input_vcf); |
32 | 32 |
33 # Convert VCF file into formats compatiable with the Poppr package. | 33 # Convert VCF file into formats compatiable with the Poppr package. |
34 gind <- vcfR2genind(vcf); | 34 genind <- vcfR2genind(vcf); |
35 # Add population information to the genind object. | 35 # Add population information to the genind object. |
36 poptab <- read.table(opt$input_pop_info, check.names=FALSE, header=T, na.strings = c("", "NA")); | 36 poptab <- read.table(opt$input_pop_info, check.names=FALSE, header=T, na.strings = c("", "NA")); |
37 gind@pop <- as.factor(poptab$region); | 37 genind@pop <- as.factor(poptab$region); |
38 # Convert genind to genclone object | 38 # Convert genind to genclone object |
39 gclo <- as.genclone(gind); | 39 gclo <- as.genclone(genind); |
40 # Calculate the bitwise distance between individuals, | 40 # Calculate the bitwise distance between individuals, |
41 # the following is similar to Provesti's distance. | 41 # the following is similar to Provesti's distance. |
42 xdis <- bitwise.dist(gclo); | 42 xdis <- bitwise.dist(gclo); |
43 # All alleles must match to make a unique multilocus | 43 # All alleles must match to make a unique multilocus |
44 # genotype (“original” naive approach). This is the | 44 # genotype (“original” naive approach). This is the |
50 # To determine the distance threshold, we will generate | 50 # To determine the distance threshold, we will generate |
51 # a neighbor-joining tree for all samples. | 51 # a neighbor-joining tree for all samples. |
52 | 52 |
53 # Start PDF device driver. | 53 # Start PDF device driver. |
54 dev.new(width=20, height=30); | 54 dev.new(width=20, height=30); |
55 file_path = get_file_path("phylogeny_tree.pdf") | 55 file_path = get_file_path("phylogeny_tree.pdf"); |
56 pdf(file=file_path, width=20, height=30, bg="white"); | 56 pdf(file=file_path, width=20, height=30, bg="white"); |
57 # Create a phylogeny of samples based on distance matrices | 57 # Create a phylogeny of samples based on distance matrices |
58 # colors. | 58 # colors. |
59 cols <- c("skyblue2","#C38D9E", '#E8A87C',"darkcyan","#e27d60"); | 59 cols <- c("skyblue2","#C38D9E", '#E8A87C',"darkcyan","#e27d60"); |
60 set.seed(999); | 60 set.seed(999); |
70 # Turn off device driver to flush output. | 70 # Turn off device driver to flush output. |
71 dev.off(); | 71 dev.off(); |
72 | 72 |
73 # Start PDF device driver. | 73 # Start PDF device driver. |
74 dev.new(width=20, height=30); | 74 dev.new(width=20, height=30); |
75 file_path = get_file_path("dissimiliarity_distance_matrix.pdf") | 75 file_path = get_file_path("dissimiliarity_distance_matrix.pdf"); |
76 pdf(file=file_path, width=20, height=30, bg="white"); | 76 pdf(file=file_path, width=20, height=30, bg="white"); |
77 # Use of mlg.filter() will create a dissimiliarity distance | 77 # Use of mlg.filter() will create a dissimiliarity distance |
78 # matrix from the data and then filter based off of that | 78 # matrix from the data and then filter based off of that |
79 # matrix. Here we will use the bitwise distance matrix | 79 # matrix. Here we will use the bitwise distance matrix |
80 # calculated above. | 80 # calculated above. |
85 # Turn off device driver to flush output. | 85 # Turn off device driver to flush output. |
86 dev.off(); | 86 dev.off(); |
87 | 87 |
88 # Start PDF device driver. | 88 # Start PDF device driver. |
89 dev.new(width=20, height=30); | 89 dev.new(width=20, height=30); |
90 file_path = get_file_path("filter_stats.pdf") | 90 file_path = get_file_path("filter_stats.pdf"); |
91 pdf(file=file_path, width=20, height=30, bg="white"); | 91 pdf(file=file_path, width=20, height=30, bg="white"); |
92 # Different clustering methods for tie breakers used by | 92 # Different clustering methods for tie breakers used by |
93 # mlg.filter, default is farthest neighbor. | 93 # mlg.filter, default is farthest neighbor. |
94 gclo_filtered <- filter_stats(gclo, distance=xdis, plot=TRUE); | 94 gclo_filtered <- filter_stats(gclo, distance=xdis, plot=TRUE); |
95 # Turn off device driver to flush output. | 95 # Turn off device driver to flush output. |
99 id <- mlg.id(gclo); | 99 id <- mlg.id(gclo); |
100 df <- data.frame(matrix((id), byrow=T)); | 100 df <- data.frame(matrix((id), byrow=T)); |
101 | 101 |
102 # Start PDF device driver. | 102 # Start PDF device driver. |
103 dev.new(width=20, height=30); | 103 dev.new(width=20, height=30); |
104 file_path = get_file_path("genotype_accumulation_curve.pdf") | 104 file_path = get_file_path("genotype_accumulation_curve.pdf"); |
105 pdf(file=file_path, width=20, height=30, bg="white"); | 105 pdf(file=file_path, width=20, height=30, bg="white"); |
106 # We can use the genotype_curve() function to create a | 106 # We can use the genotype_curve() function to create a |
107 # genotype accumulation curve to determine the minimum | 107 # genotype accumulation curve to determine the minimum |
108 # number of loci to identify unique MLGs. | 108 # number of loci to identify unique MLGs. |
109 gac <- genotype_curve(gind, sample=5, quiet=TRUE); | 109 gac <- genotype_curve(genind, sample=5, quiet=TRUE); |
110 # Turn off device driver to flush output. | 110 # Turn off device driver to flush output. |
111 dev.off(); | 111 dev.off(); |
112 | 112 |
113 # Start PDF device driver. | 113 # Start PDF device driver. |
114 dev.new(width=20, height=30); | 114 dev.new(width=20, height=30); |
115 file_path = get_file_path("genotype_accumulation_curve_for_gind.pdf") | 115 file_path = get_file_path("genotype_accumulation_curve_for_genind.pdf"); |
116 pdf(file=file_path, width=20, height=30, bg="white"); | 116 pdf(file=file_path, width=20, height=30, bg="white"); |
117 p <- last_plot(); | 117 p <- last_plot(); |
118 p + geom_smooth() + xlim(0, 100) + theme_bw(); | 118 p + geom_smooth() + xlim(0, 100) + theme_bw(); |
119 | 119 |
120 # From the collapsed MLGs, we can calculate genotypic | 120 # From the collapsed MLGs, we can calculate genotypic |