annotate multilocus_genotype.R @ 18:1190ee1456f6 draft default tip

Uploaded
author greg
date Mon, 13 May 2019 08:37:37 -0400
parents 85f8fc57eee4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
1 #!/usr/bin/env Rscript
725b160c91f0 Uploaded
greg
parents:
diff changeset
2
725b160c91f0 Uploaded
greg
parents:
diff changeset
3 suppressPackageStartupMessages(library("adegenet"))
725b160c91f0 Uploaded
greg
parents:
diff changeset
4 suppressPackageStartupMessages(library("ape"))
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
5 suppressPackageStartupMessages(library("data.table"))
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
6 suppressPackageStartupMessages(library("dbplyr"))
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
7 suppressPackageStartupMessages(library("dplyr"))
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
8 suppressPackageStartupMessages(library("ggplot2"))
725b160c91f0 Uploaded
greg
parents:
diff changeset
9 suppressPackageStartupMessages(library("knitr"))
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
10 suppressPackageStartupMessages(library("maps"))
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
11 suppressPackageStartupMessages(library("mapproj"))
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
12 suppressPackageStartupMessages(library("optparse"))
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
13 suppressPackageStartupMessages(library("poppr"))
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
14 suppressPackageStartupMessages(library("RColorBrewer"))
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
15 suppressPackageStartupMessages(library("RPostgres"))
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
16 suppressPackageStartupMessages(library("SNPRelate"))
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
17 suppressPackageStartupMessages(library("tidyr"))
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
18 suppressPackageStartupMessages(library("vcfR"))
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
19 suppressPackageStartupMessages(library("vegan"))
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
20 suppressPackageStartupMessages(library("yarrr"))
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
21 theme_set(theme_bw())
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
22
725b160c91f0 Uploaded
greg
parents:
diff changeset
23 option_list <- list(
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
24 make_option(c("--database_connection_string"), action="store", dest="database_connection_string", help="Corals (stag) database connection string"),
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
25 make_option(c("--input_affy_metadata"), action="store", dest="input_affy_metadata", help="Affymetrix 96 well plate input file"),
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
26 make_option(c("--input_pop_info"), action="store", dest="input_pop_info", help="Population information input file"),
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
27 make_option(c("--input_vcf"), action="store", dest="input_vcf", help="VCF input file"),
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
28 make_option(c("--output_nj_phylogeny_tree"), action="store", dest="output_nj_phylogeny_tree", default=NULL, help="Flag to plot neighbor-joining phylogeny tree"),
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
29 make_option(c("--output_stag_db_report"), action="store", dest="output_stag_db_report", help="Flag to output stag db report file")
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
30 )
725b160c91f0 Uploaded
greg
parents:
diff changeset
31
725b160c91f0 Uploaded
greg
parents:
diff changeset
32 parser <- OptionParser(usage="%prog [options] file", option_list=option_list);
725b160c91f0 Uploaded
greg
parents:
diff changeset
33 args <- parse_args(parser, positional_arguments=TRUE);
725b160c91f0 Uploaded
greg
parents:
diff changeset
34 opt <- args$options;
725b160c91f0 Uploaded
greg
parents:
diff changeset
35
725b160c91f0 Uploaded
greg
parents:
diff changeset
36 get_file_path = function(file_name) {
725b160c91f0 Uploaded
greg
parents:
diff changeset
37 file_path = paste("output_plots_dir", file_name, sep="/");
725b160c91f0 Uploaded
greg
parents:
diff changeset
38 return(file_path);
725b160c91f0 Uploaded
greg
parents:
diff changeset
39 }
725b160c91f0 Uploaded
greg
parents:
diff changeset
40
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
41 get_database_connection <- function(db_conn_string) {
d2057e183772 Uploaded
greg
parents: 7
diff changeset
42 # Instantiate database connection.
d2057e183772 Uploaded
greg
parents: 7
diff changeset
43 # The connection string has this format:
d2057e183772 Uploaded
greg
parents: 7
diff changeset
44 # postgresql://user:password@host/dbname
d2057e183772 Uploaded
greg
parents: 7
diff changeset
45 conn_items <- strsplit(db_conn_string, "://")[[1]];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
46 string_needed <- conn_items[2];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
47 items_needed <- strsplit(string_needed, "@")[[1]];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
48 user_pass_string <- items_needed[1];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
49 host_dbname_string <- items_needed[2];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
50 user_pass_items <- strsplit(user_pass_string, ":")[[1]];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
51 host_dbname_items <- strsplit(host_dbname_string, "/")[[1]];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
52 user <- user_pass_items[1];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
53 pass <- user_pass_items[2];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
54 host <- host_dbname_items[1];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
55 dbname <- host_dbname_items[2];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
56 # FIXME: is there a way to not hard-code the port?
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
57 conn <- DBI::dbConnect(RPostgres::Postgres(), host=host, port="5432", dbname=dbname, user=user, password=pass);
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
58 return (conn);
d2057e183772 Uploaded
greg
parents: 7
diff changeset
59 }
d2057e183772 Uploaded
greg
parents: 7
diff changeset
60
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
61 time_elapsed <- function(start_time) {
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
62 cat("Elapsed time: ", proc.time() - start_time, "\n\n");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
63 }
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
64
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
65 time_start <- function(msg) {
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
66 start_time <- proc.time();
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
67 cat(msg, "...\n");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
68 return(start_time);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
69 }
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
70
3
1bc815d9c8c5 Uploaded
greg
parents: 2
diff changeset
71 # Read in VCF input file.
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
72 start_time <- time_start("Reading VCF input");
2
86aaadf36a4f Uploaded
greg
parents: 0
diff changeset
73 vcf <- read.vcfR(opt$input_vcf);
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
74 time_elapsed(start_time);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
75
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
76 # Convert VCF file into a genind for the Poppr package.
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
77 start_time <- time_start("Converting VCF data to a genind object");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
78 genind_obj <- vcfR2genind(vcf);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
79 time_elapsed(start_time);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
80
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
81 # Add population information to the genind object.
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
82 population_info_data_table <- read.table(opt$input_pop_info, check.names=FALSE, header=F, na.strings=c("", "NA"), stringsAsFactors=FALSE, sep="\t");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
83 colnames(population_info_data_table) <- c("row_id", "affy_id", "user_specimen_id", "region");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
84 genind_obj@pop <- as.factor(population_info_data_table$region);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
85 strata(genind_obj) <- data.frame(pop(genind_obj));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
86
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
87 # Convert genind object to a genclone object.
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
88 start_time <- time_start("Converting the genind object to a genclone object");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
89 genind_clone <- as.genclone(genind_obj);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
90 time_elapsed(start_time);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
91
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
92 # Calculate the bitwise distance between individuals.
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
93 start_time <- time_start("Calculating the bitwise distance between individuals");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
94 bitwise_distance <- bitwise.dist(genind_clone);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
95 time_elapsed(start_time);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
96
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
97 # Multilocus genotypes (threshold of 3.2%).
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
98 mlg.filter(genind_clone, distance=bitwise_distance) <- 0.032;
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
99 m <- mlg.table(genind_clone, background=TRUE, color=TRUE);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
100
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
101 # Create list of MLGs.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
102 mlg_ids <- mlg.id(genind_clone);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
103
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
104 # Read user's Affymetrix 96 well plate tabular file.
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
105 affy_metadata_data_frame <- read.table(opt$input_affy_metadata, header=FALSE, stringsAsFactors=FALSE, sep="\t", na.strings = c("", "NA"));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
106 colnames(affy_metadata_data_frame) <- c("user_specimen_id", "field_call", "bcoral_genet_id", "bsym_genet_id", "reef",
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
107 "region", "latitude", "longitude", "geographic_origin", "sample_location",
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
108 "latitude_outplant", "longitude_outplant", "depth", "disease_resist",
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
109 "bleach_resist", "mortality","tle", "spawning", "collector_last_name",
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
110 "collector_first_name", "organization", "collection_date", "email", "seq_facility",
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
111 "array_version", "public", "public_after_date", "sperm_motility", "healing_time",
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
112 "dna_extraction_method", "dna_concentration", "registry_id");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
113 affy_metadata_data_frame$user_specimen_id <- as.character(affy_metadata_data_frame$user_specimen_id);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
114 user_specimen_ids <- as.character(affy_metadata_data_frame$user_specimen_id);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
115 # The specimen_id_field_call_data_table looks like this:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
116 # user_specimen_ids V2
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
117 # test_002 prolifera
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
118 # test_003 prolifera
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
119 specimen_id_field_call_data_table <- data.table(user_specimen_ids, affy_metadata_data_frame$field_call);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
120 # Rename the user_specimen_ids column.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
121 setnames(specimen_id_field_call_data_table, c("user_specimen_ids"), c("user_specimen_id"));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
122 # Rename the V2 column.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
123 setnames(specimen_id_field_call_data_table, c("V2"), c("field_call"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
124
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
125 # Connect to database.
d2057e183772 Uploaded
greg
parents: 7
diff changeset
126 conn <- get_database_connection(opt$database_connection_string);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
127 # Import the sample table.
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
128 sample_table <- tbl(conn, "sample");
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
129 # Import the genotype table.
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
130 genotype_table <- tbl(conn, "genotype");
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
131 # Select columns from the sample table and the
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
132 # genotype table joined by genotype_id.
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
133 sample_table_columns <- sample_table %>% select(user_specimen_id, affy_id, genotype_id);
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
134 smlg <- sample_table_columns %>%
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
135 left_join(genotype_table %>%
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
136 select("id", "coral_mlg_clonal_id", "symbio_mlg_clonal_id"),
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
137 by=c("genotype_id"="id"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
138 # Convert to dataframe.
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
139 smlg_data_frame <- data.frame(smlg);
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
140 # Name the columns.
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
141 colnames(smlg_data_frame) <- c("user_specimen_id", "affy_id", "genotype_id", "coral_mlg_clonal_id", "symbio_mlg_clonal_id");
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
142
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
143 # Missing GT in samples submitted.
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
144 start_time <- time_start("Discovering missing GT in samples");
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
145 gt <- extract.gt(vcf, element="GT", as.numeric=FALSE);
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
146 missing_gt <- apply(gt, MARGIN=2, function(x){ sum(is.na(x))});
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
147 missing_gt <- (missing_gt / nrow(vcf)) * 100;
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
148 missing_gt_data_frame <- data.frame(missing_gt);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
149 # The specimen_id_field_call_data_table looks like this:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
150 # rn missing_gt
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
151 # a100000-4368120-060520-256_I07.CEL 0.06092608
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
152 # a100000-4368120-060520-256_K07.CEL 0.05077173
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
153 missing_gt_data_table <-setDT(missing_gt_data_frame, keep.rownames=TRUE)[];
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
154 # Rename the rn column.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
155 setnames(missing_gt_data_table, c("rn"), c("affy_id"));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
156 # Rename the missing_gt column.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
157 setnames(missing_gt_data_table, c("missing_gt"), c("percent_missing_data_coral"));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
158 # Round data to two digits.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
159 missing_gt_data_table$percent_missing_data_coral <- round(missing_gt_data_table$percent_missing_data_coral, digits=2);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
160 time_elapsed(start_time);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
161
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
162 # Heterozygous alleles.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
163 start_time <- time_start("Discovering heterozygous alleles");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
164 heterozygous_alleles <- apply(gt, MARGIN=2, function(x) {sum(lengths(regmatches(x, gregexpr("0/1", x))))});
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
165 heterozygous_alleles <- (heterozygous_alleles / nrow(vcf)) * 100;
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
166 heterozygous_alleles_data_frame <- data.frame(heterozygous_alleles);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
167 # The heterozygous_alleles_data_table looks like this:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
168 # rn heterozygous_alleles
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
169 # a100000-4368120-060520-256_I07.CEL 73.94903
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
170 # a100000-4368120-060520-256_K07.CEL 74.40089
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
171 heterozygous_alleles_data_table <- setDT(heterozygous_alleles_data_frame, keep.rownames=TRUE)[];
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
172 # Rename the rn column.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
173 setnames(heterozygous_alleles_data_table, c("rn"), c("affy_id"));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
174 # Rename the heterozygous_alleles column.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
175 setnames(heterozygous_alleles_data_table, c("heterozygous_alleles"), c("percent_heterozygous_coral"));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
176 # Round data to two digits.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
177 heterozygous_alleles_data_table$percent_heterozygous_coral <- round(heterozygous_alleles_data_table$percent_heterozygous_coral, digits=2);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
178 time_elapsed(start_time);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
179
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
180 # Reference alleles.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
181 start_time <- time_start("Discovering reference alleles");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
182 reference_alleles <- apply(gt, MARGIN=2, function(x) {sum(lengths(regmatches(x, gregexpr("0/0", x))))});
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
183 reference_alleles <- (reference_alleles / nrow(vcf)) * 100;
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
184 reference_alleles_data_frame <- data.frame(reference_alleles);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
185 # The reference_alleles_data_table looks like this:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
186 # rn reference_alleles
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
187 # a100000-4368120-060520-256_I07.CEL 11.60642
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
188 # a100000-4368120-060520-256_K07.CEL 11.45918
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
189 reference_alleles_data_table <- setDT(reference_alleles_data_frame, keep.rownames=TRUE)[];
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
190 # Rename the rn column.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
191 setnames(reference_alleles_data_table, c("rn"), c("affy_id"));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
192 # Rename the reference_alleles column.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
193 setnames(reference_alleles_data_table, c("reference_alleles"), c("percent_reference_coral"));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
194 # Round data to two digits.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
195 reference_alleles_data_table$percent_reference <- round(reference_alleles_data_table$percent_reference, digits=2);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
196 time_elapsed(start_time);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
197
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
198 # Alternative alleles
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
199 start_time <- time_start("Discovering alternative alleles");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
200 alternative_alleles <- apply(gt, MARGIN=2, function(x) {sum(lengths(regmatches(x, gregexpr("1/1", x))))});
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
201 alternative_alleles <- (alternative_alleles / nrow(vcf)) * 100;
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
202 alternative_alleles_data_frame <- data.frame(alternative_alleles);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
203 # The alternative_alleles_data_table looks like this:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
204 # rn alternative_alleles
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
205 # a100000-4368120-060520-256_I07.CEL 14.38363
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
206 # a100000-4368120-060520-256_K07.CEL 14.08916
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
207 alternative_alleles_data_table <- setDT(alternative_alleles_data_frame, keep.rownames=TRUE)[];
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
208 # Rename the rn column.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
209 setnames(alternative_alleles_data_table, c("rn"), c("affy_id"));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
210 # Rename the alternative_alleles column.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
211 setnames(alternative_alleles_data_table, c("alternative_alleles"), c("percent_alternative_coral"));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
212 # Round data to two digits.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
213 alternative_alleles_data_table$percent_alternative <- round(alternative_alleles_data_table$percent_alternative, digits=2);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
214 time_elapsed(start_time);
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
215
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
216 # The mlg_ids_data_table looks like this:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
217 # mlg_ids
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
218 # a550962-4368120-060520-500_M23.CEL
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
219 # a550962-4368120-060520-256_A19.CEL
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
220 mlg_ids_data_table <- data.table(mlg_ids, keep.rownames=TRUE);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
221 # Rename the mlg_ids column.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
222 setnames(mlg_ids_data_table, c("mlg_ids"), c("affy_id"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
223
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
224 # sample_mlg_tibble looks like this:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
225 # A tibble: 262 x 3
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
226 # Groups: group [?]
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
227 # group affy_id coral_mlg_clonal_id
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
228 # <int> <chr> <chr>
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
229 # 1 a550962-4368120-060520-500_M23.CEL NA
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
230 # 2 a550962-4368120-060520-256_A19.CEL HG0006
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
231 sample_mlg_tibble <- mlg_ids_data_table %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
232 group_by(row_number()) %>%
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
233 dplyr::rename(group="row_number()") %>%
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
234 unnest (affy_id) %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
235 # Join with mlg table.
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
236 left_join(smlg_data_frame %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
237 select("affy_id","coral_mlg_clonal_id"),
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
238 by="affy_id");
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
239
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
240 # If found in database, group members on previous mlg id.
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
241 uniques <- unique(sample_mlg_tibble[c("group", "coral_mlg_clonal_id")]);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
242 uniques <- uniques[!is.na(uniques$coral_mlg_clonal_id),];
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
243 na.mlg <- which(is.na(sample_mlg_tibble$coral_mlg_clonal_id));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
244 na.group <- sample_mlg_tibble$group[na.mlg];
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
245 sample_mlg_tibble$coral_mlg_clonal_id[na.mlg] <- uniques$coral_mlg_clonal_id[match(na.group, uniques$group)];
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
246
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
247 # Find out if the sample mlg matched a previous genotyped sample.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
248 # sample_mlg_match_tibble looks like this:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
249 # A tibble: 262 x 4
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
250 # Groups: group [230]
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
251 # group affy_id coral_mlg_clonal_id DB_match
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
252 # <int> <chr> <chr> <chr>
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
253 # 1 a550962-4368120-060520-500_M23.CEL NA no_match
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
254 # 2 a550962-4368120-060520-256_A19.CEL HG0006 match
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
255 sample_mlg_match_tibble <- sample_mlg_tibble %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
256 group_by(group) %>%
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
257 mutate(DB_match = ifelse(is.na(coral_mlg_clonal_id),"no_match", "match"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
258
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
259 # Create new mlg id for samples with no matches in the database.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
260 none <- unique(sample_mlg_match_tibble[c("group", "coral_mlg_clonal_id")]);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
261 none <- none[is.na(none$coral_mlg_clonal_id),];
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
262 na.mlg2 <- which(is.na(sample_mlg_match_tibble$coral_mlg_clonal_id));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
263 n.g <- sample_mlg_match_tibble$group[na.mlg2];
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
264 ct <- length(unique(n.g));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
265
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
266 # List of new group ids, the sequence starts at the number of
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
267 # ids present in sample_mlg_match_tibble$coral_mlg_clonal_ids
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
268 # plus 1.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
269 # FIXME: Not sure if # the sample_mlg_match_tibble file
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
270 # contains all ids. If it doesn't then look below to change
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
271 # the seq() function.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
272 n.g_ids <- sprintf("HG%04d", seq((sum(!is.na(unique(sample_mlg_match_tibble["coral_mlg_clonal_id"]))) + 1), by=1, length=ct));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
273
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
274 # Assign the new id iteratively for all that have NA.
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
275 for (i in 1:length(na.mlg2)) {
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
276 sample_mlg_match_tibble$coral_mlg_clonal_id[na.mlg2[i]] <- n.g_ids[match(sample_mlg_match_tibble$group[na.mlg2[i]], unique(n.g))];
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
277 }
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
278
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
279 # Subset population_info_data_table for all samples.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
280 # affy_id_user_specimen_id_vector looks like this:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
281 # affy_id user_specimen_id
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
282 # a100000-4368120-060520-256_I07.CEL 13704
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
283 # a100000-4368120-060520-256_K07.CEL 13706
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
284 affy_id_user_specimen_id_vector <- population_info_data_table[c(2, 3)];
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
285
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
286 # Merge data frames for final table.
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
287 start_time <- time_start("Merging data frames");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
288 stag_db_report <- specimen_id_field_call_data_table %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
289 left_join(affy_id_user_specimen_id_vector %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
290 select("affy_id", "user_specimen_id"),
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
291 by="user_specimen_id") %>%
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
292 left_join(sample_mlg_match_tibble %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
293 select("affy_id", "coral_mlg_clonal_id", "DB_match"),
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
294 by="affy_id") %>%
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
295 left_join(missing_gt_data_table %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
296 select("affy_id", "percent_missing_data_coral"),
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
297 by="affy_id") %>%
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
298 left_join(heterozygous_alleles_data_table %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
299 select("affy_id", "percent_heterozygous_coral"),
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
300 by="affy_id") %>%
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
301 left_join(reference_alleles_data_table %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
302 select("affy_id", "percent_reference_coral"),
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
303 by="affy_id") %>%
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
304 left_join(alternative_alleles_data_table %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
305 select("affy_id", "percent_alternative_coral"),
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
306 by="affy_id") %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
307 mutate(DB_match = ifelse(is.na(DB_match), "failed", DB_match))%>%
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
308 mutate(coral_mlg_clonal_id = ifelse(is.na(coral_mlg_clonal_id), "failed", coral_mlg_clonal_id)) %>%
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
309 mutate(genetic_coral_species_call = ifelse(percent_alternative_coral >= 40 & percent_alternative_coral <= 44.5, "A.palmata","other")) %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
310 mutate(genetic_coral_species_call = ifelse(percent_alternative_coral >= 45.5 & percent_alternative_coral <= 50, "A.cervicornis", genetic_coral_species_call)) %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
311 mutate(genetic_coral_species_call = ifelse(percent_heterozygous_coral > 40, "A.prolifera", genetic_coral_species_call)) %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
312 ungroup() %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
313 select(-group);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
314 time_elapsed(start_time);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
315
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
316 start_time <- time_start("Writing csv output");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
317 write.csv(stag_db_report, file=opt$output_stag_db_report, quote=FALSE);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
318 time_elapsed(start_time);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
319
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
320 # Database sample table.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
321 sample_db <- affy_metadata_data_frame %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
322 left_join(stag_db_report %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
323 select("user_specimen_id","affy_id", "percent_missing_data_coral", "percent_heterozygous_coral", "percent_reference_coral", "percent_alternative_coral"),
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
324 by='user_specimen_id');
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
325
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
326 # Representative clone for genotype table.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
327 start_time <- time_start("Creating representative clone for genotype table");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
328 no_dup_genotypes_genind <- clonecorrect(genind_clone, strata = ~pop.genind_obj.);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
329 id_rep <- mlg.id(no_dup_genotypes_genind);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
330 id_data_table <- data.table(id_rep, keep.rownames=TRUE);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
331 # Rename the id_rep column.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
332 setnames(id_data_table, c("id_rep"), c("affy_id"));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
333 time_elapsed(start_time);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
334
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
335 # # Combine with previously genotyped samples in the database.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
336 start_time <- time_start("Selecting from various database tables");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
337 representative_mlg_tibble <- id_data_table %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
338 group_by(row_number()) %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
339 rename(group='row_number()') %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
340 unnest(affy_id) %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
341 left_join(stag_db_report %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
342 select("coral_mlg_clonal_id", "user_specimen_id", "affy_id"),
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
343 by='affy_id') %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
344 mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_clonal_id), "", affy_id)) %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
345 ungroup() %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
346 select(-group);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
347
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
348 # Database genotype table.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
349 genotype_table_join <- sample_mlg_match_tibble %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
350 left_join(representative_mlg_tibble %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
351 select("affy_id", "coral_mlg_rep_sample_id", "user_specimen_id"),
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
352 by='affy_id') %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
353 ungroup() %>%
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
354 select(-group);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
355
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
356 # Database taxonomy table.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
357 taxonomy_table_join <- stag_db_report %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
358 select(genetic_coral_species_call, affy_id) %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
359 mutate(genus_name = ifelse(genetic_coral_species_call == genetic_coral_species_call[grep("^A.*", genetic_coral_species_call)], "Acropora", "other")) %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
360 mutate(species_name = ifelse(genetic_coral_species_call == "A.palmata", "palmata", "other")) %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
361 mutate(species_name = ifelse(genetic_coral_species_call == "A.cervicornis", "cervicornis", species_name)) %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
362 mutate(species_name = ifelse(genetic_coral_species_call == "A.prolifera", "prolifera", species_name));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
363 time_elapsed(start_time);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
364 # Table of alleles for the new samples subset to new plate data.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
365 # Create vector indicating number of individuals desired from
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
366 # affy_id column of stag_db_report data table.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
367 i <- ifelse(is.na(stag_db_report[1]), "", stag_db_report[[1]]);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
368 i <- i[!apply(i== "", 1, all),];
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
369 sample_alleles_vector <- genind_clone[i, mlg.reset=FALSE, drop=FALSE];
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
370
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
371 # cols looks like this:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
372 # blue1 red green pink orange blue2
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
373 # "#0C5BB0FF" "#EE0011FF" "#15983DFF" "#EC579AFF" "#FA6B09FF" "#149BEDFF"
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
374 # green2 yellow turquoise poop
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
375 # "#A1C720FF" "#FEC10BFF" "#16A08CFF" "#9A703EFF"
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
376 cols <- piratepal("basel");
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
377 set.seed(999);
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
378
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
379 if (!is.null(opt$output_nj_phylogeny_tree)) {
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
380 # Create a phylogeny tree of samples based on distance matrices.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
381 # Start PDF device driver.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
382 start_time <- time_start("Creating nj_phylogeny_tree.pdf");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
383 dev.new(width=10, height=7);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
384 file_path = get_file_path("nj_phylogeny_tree.pdf");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
385 pdf(file=file_path, width=10, height=7);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
386 # Organize branches by clade.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
387 nj_phylogeny_tree <- sample_alleles_vector %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
388 aboot(dist=provesti.dist, sample=100, tree="nj", cutoff=50, quiet=TRUE) %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
389 ladderize();
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
390 nj_phylogeny_tree$tip.label <- stag_db_report$user_specimen_id[match(nj_phylogeny_tree$tip.label, stag_db_report$affy_id)];
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
391 plot.phylo(nj_phylogeny_tree, tip.color=cols[sample_alleles_vector$pop], label.offset=0.0125, cex=0.3, font=2, lwd=4, align.tip.label=F, no.margin=T);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
392 # Add a scale bar showing 5% difference.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
393 add.scale.bar(0, 0.95, length=0.05, cex=0.65, lwd=3);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
394 nodelabels(nj_phylogeny_tree$node.label, cex=.5, adj=c(1.5, -0.1), frame="n", font=3, xpd=TRUE);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
395 legend("topright", legend=c(levels(sample_alleles_vector$pop)), text.col=cols, xpd=T, cex=0.8);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
396 dev.off()
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
397 time_elapsed(start_time);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
398 }
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
399
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
400 # Subset VCF to the user samples.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
401 start_time <- time_start("Subsetting vcf to the user samples");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
402 l <- length(i);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
403 n <- ncol(vcf@gt);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
404 s <- n - l;
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
405 svcf <- vcf[, s:n];
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
406 write.vcf(svcf, "subset.vcf.gz");
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
407 vcf.fn <- "subset.vcf.gz";
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
408 snpgdsVCF2GDS(vcf.fn, "test3.gds", method="biallelic.only");
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
409 genofile <- snpgdsOpen(filename="test3.gds", readonly=FALSE);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
410 gds_array <- read.gdsn(index.gdsn(genofile, "sample.id"));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
411 # gds_array looks like this:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
412 # [1] "a550962-4368120-060520-500_A03.CEL" "a550962-4368120-060520-500_A05.CEL"
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
413 # [3] "a550962-4368120-060520-500_A09.CEL" "a550962-4368120-060520-500_A11.CEL"
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
414 gds_data_frame <- data.frame(gds_array);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
415 # gds_data_frame looks like this:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
416 # gds_array
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
417 # a550962-4368120-060520-500_A03.CEL
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
418 # a550962-4368120-060520-500_A05.CEL
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
419 gds_data_table <- setDT(gds_data_frame, keep.rownames=FALSE)[];
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
420 # Rename the gds_array column.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
421 setnames(gds_data_table, c("gds_array"), c("affy_id"));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
422 # affy_id_region_list looks like this:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
423 # affy_id region
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
424 # a100000-4368120-060520-256_I07.CEL USVI
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
425 # a100000-4368120-060520-256_K07.CEL USVI
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
426 affy_id_region_list <- population_info_data_table[c(2,4)];
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
427 gds_data_table_join <- gds_data_table %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
428 left_join(affy_id_region_list %>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
429 select("affy_id", "region"),
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
430 by='affy_id')%>%
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
431 drop_na();
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
432 samp.annot <- data.frame(pop.group=c(gds_data_table_join$region));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
433 add.gdsn(genofile, "sample.annot", samp.annot);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
434 # population_code looks like this:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
435 # [1] 18.361733 18.361733 18.361733 18.361733 18.361733 18.361733
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
436 # [7] 25.11844009 25.11844009 25.11844009 25.11844009 25.11844009 25.11844009
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
437 population_code <- read.gdsn(index.gdsn(genofile, path="sample.annot/pop.group"));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
438 pop.group <- as.factor(read.gdsn(index.gdsn(genofile, "sample.annot/pop.group")));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
439 # pop.group looks like this:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
440 # [1] 18.361733 18.361733 18.361733 18.361733 18.361733 18.361733
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
441 # [7] 25.11844009 25.11844009 25.11844009 25.11844009 25.11844009 25.11844009
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
442 time_elapsed(start_time);
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
443
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
444 # Distance matrix calculation.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
445 start_time <- time_start("Calculating distance matrix");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
446 ibs <- snpgdsIBS(genofile, num.thread=2, autosome.only=FALSE);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
447 time_elapsed(start_time);
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
448
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
449 # Cluster analysis on the genome-wide IBS pairwise distance matrix.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
450 start_time <- time_start("Clustering the genome-wide IBS pairwise distance matrix");
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
451 set.seed(100);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
452 par(cex=0.6, cex.lab=1, cex.axis=1.5,cex.main=2);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
453 ibs.hc <- snpgdsHCluster(snpgdsIBS(genofile, autosome.only=FALSE));
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
454 time_elapsed(start_time);
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
455
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
456 # Default clustering.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
457 start_time <- time_start("Creating ibs_default.pdf");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
458 # Start PDF device driver.
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
459 dev.new(width=10, height=7);
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
460 file_path = get_file_path("ibs_default.pdf");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
461 pdf(file=file_path, width=10, height=7);
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
462 rv <- snpgdsCutTree(ibs.hc, col.list=cols, pch.list=15);
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
463 snpgdsDrawTree(rv, main="Color by Cluster", leaflab="perpendicular", y.label=0.2);
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
464 legend("topleft", legend=levels(rv$samp.group), xpd=T, col=cols[1:nlevels(rv$samp.group)], pch=15, ncol=4, cex=1.2);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
465 dev.off()
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
466 time_elapsed(start_time);
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
467
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
468 # Color cluster by region.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
469 start_time <- time_start("Creating ibs_region.pdf");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
470 # Start PDF device driver.
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
471 dev.new(width=10, height=7);
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
472 file_path = get_file_path("ibs_region.pdf");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
473 pdf(file=file_path, width=10, height=7);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
474 race <- as.factor(population_code);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
475 rv2 <- snpgdsCutTree(ibs.hc, samp.group=race,col.list=cols, pch.list=15);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
476 snpgdsDrawTree(rv2, main="Color by Region", leaflab="perpendicular", y.label=0.2);
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
477 legend("topleft", legend=levels(race), xpd=T, col=cols[1:nlevels(race)], pch=15, ncol=4, cex=1.2);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
478 dev.off()
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
479 time_elapsed(start_time);
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
480
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
481 # close GDS file.
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
482 snpgdsClose(genofile);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
483
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
484 # Sample MLG on a map.
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
485 start_time <- time_start("Creating mlg_map.pdf");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
486 # Get the lattitude and longitude boundaries for rendering
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
487 # the map. Tese boundaries will restrict the map to focus
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
488 # (i.e., zoom) on the region of the world map from which
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
489 # the samples were taken.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
490 max_latitude <- max(affy_metadata_data_frame$latitude, na.rm=TRUE);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
491 min_latitude <- min(affy_metadata_data_frame$latitude, na.rm=TRUE);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
492 latitude_range_vector <- c(min_latitude-3, max_latitude+3);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
493 max_longitude <- max(affy_metadata_data_frame$longitude, na.rm=TRUE);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
494 min_longitude <- min(affy_metadata_data_frame$longitude, na.rm=TRUE);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
495 longitude_range_vector <- c(min_longitude-3, max_longitude+3);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
496 # Get the palette colors for rendering plots.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
497 colors <- length(unique(stag_db_report$coral_mlg_clonal_id));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
498 # Get a color palette.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
499 palette <- colorRampPalette(piratepal("basel"));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
500 # Start PDF device driver.
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
501 dev.new(width=10, height=7);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
502 file_path = get_file_path("mlg_map.pdf");
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
503 pdf(file=file_path, width=10, height=7);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
504 world_data = map_data("world");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
505 # Add the coral_mlg_clonal_id column from the stag_db_report
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
506 # data fram to the affy_metadata_data_frame.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
507 affy_metadata_data_frame$mlg <- stag_db_report$coral_mlg_clonal_id;
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
508 # Get the number of colors needed from the palette for plotting
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
509 # the sample locations on the world map.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
510 num_colors = length(unique(affy_metadata_data_frame$mlg));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
511 # Get a color palette.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
512 palette = colorRampPalette(piratepal("basel"));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
513 ggplot() +
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
514 geom_map(data=world_data, map=world_data, aes(x=long, y=lat, group=group, map_id=region), fill="white", colour="#7f7f7f") +
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
515 coord_map(xlim=longitude_range_vector, ylim=latitude_range_vector) +
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
516 geom_point(data=affy_metadata_data_frame, aes(x=longitude, y=latitude, group=mlg, color=mlg), alpha=.7, size=3) +
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
517 scale_color_manual(values=palette(num_colors)) +
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
518 theme(legend.position="bottom") +
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
519 guides(color=guide_legend(nrow=8, byrow=F));
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
520 dev.off()
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
521 time_elapsed(start_time);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
522
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
523 # Missing data barplot.
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
524 start_time <- time_start("Creating missing_data.pdf");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
525 population_info_data_table$miss <- stag_db_report$percent_missing_data_coral[match(missing_gt_data_frame$affy_id, stag_db_report$affy_id)];
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
526 test2 <- which(!is.na(population_info_data_table$miss));
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
527 miss96 <- population_info_data_table$miss[test2];
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
528 name96 <- population_info_data_table$user_specimen_id[test2];
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
529 # Start PDF device driver.
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
530 dev.new(width=10, height=7);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
531 file_path = get_file_path("missing_data.pdf");
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
532 pdf(file=file_path, width=10, height=7);
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
533 par(mar = c(8, 4, 4, 2));
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
534 x <- barplot(miss96, las=2, col=cols, ylim=c(0, 3), cex.axis=0.8, space=0.8, ylab="Missingness (%)", xaxt="n");
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
535 text(cex=0.6, x=x-0.25, y=-.05, name96, xpd=TRUE, srt=60, adj=1);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
536 dev.off()
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
537 time_elapsed(start_time);
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
538
15
62ad61eac1ff Uploaded
greg
parents: 14
diff changeset
539 # Generate a pie chart for each sample with a genotype.
62ad61eac1ff Uploaded
greg
parents: 14
diff changeset
540 # Store the numerical and user_specimen_id values from
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
541 # stag_db_report for the charts (user_specimen_id names
15
62ad61eac1ff Uploaded
greg
parents: 14
diff changeset
542 # will be used to label each chart).
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
543 start_time <- time_start("Creating percent_breakdown.pdf");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
544 stag_db_report_data_table <- stag_db_report[c(-2, -3, -4)];
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
545 # Remove NA and NaN values.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
546 stag_db_report_data_table <- na.omit(stag_db_report_data_table);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
547 # Translate to N (i.e., number of samples with a genotype)
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
548 # columns and 5 rows.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
549 translated_stag_db_report_data_table <- t(stag_db_report_data_table);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
550 translated_stag_db_report_matrix <- as.matrix(translated_stag_db_report_data_table[-1,]);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
551 # Set the storage mode of the matrix to numeric. In some
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
552 # cases this could result in the following:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
553 # Warning message:
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
554 # In mde(x) : NAs introduced by coercion
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
555 mode(translated_stag_db_report_matrix) <- "numeric";
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
556 # Remove NA and NaN values that may have been introduced
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
557 # by coercion.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
558 translated_stag_db_report_matrix <- na.omit(translated_stag_db_report_matrix);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
559 tsdbrm_row_means <- rowMeans(translated_stag_db_report_matrix, na.rm=TRUE);
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
560 dev.new(width=10, height=7);
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
561 file_path = get_file_path("percent_breakdown.pdf");
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
562 pdf(file=file_path, width=10, height=7);
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
563 # Average pie of all samples.
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
564 labels <- paste(c("missing data", "mixed", "reference", "alternative"), " (", round(tsdbrm_row_means, 1), "%)", sep="");
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
565 col <- c("GREY", "#006DDB", "#24FF24", "#920000");
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
566 main <- "Average breakdown of SNP assignments across all samples";
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
567 pie(tsdbrm_row_means, labels=labels, radius=0.60, col=col, main=main, cex.main=.75);
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
568 par(mfrow=c(3, 2));
14
96ee9122823e Uploaded
greg
parents: 12
diff changeset
569 col <- c("GREY", "#006DDB", "#24FF24", "#920000");
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
570 # Generate a pie chart for each sample with genotypes.
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
571 for (i in 1:ncol(translated_stag_db_report_matrix)) {
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
572 tmp_labels <- paste(labels, " (", round(translated_stag_db_report_matrix[,i], 1), "%)", sep="");
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
573 main <- paste("Breakdown of SNP assignments for", translated_stag_db_report_data_table[1, i]);
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
574 pie(translated_stag_db_report_matrix[,i], labels=tmp_labels, radius=0.90, col=col, main=main, cex.main=.85, cex=0.75);
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
575 }
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
576 dev.off()
18
1190ee1456f6 Uploaded
greg
parents: 17
diff changeset
577 time_elapsed(start_time);