annotate multilocus_genotype.R @ 9:8f2f346a5e1c draft

Uploaded
author greg
date Tue, 04 Dec 2018 13:44:12 -0500
parents d2057e183772
children 6c93244a36e2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
1 #!/usr/bin/env Rscript
725b160c91f0 Uploaded
greg
parents:
diff changeset
2
725b160c91f0 Uploaded
greg
parents:
diff changeset
3 suppressPackageStartupMessages(library("adegenet"))
725b160c91f0 Uploaded
greg
parents:
diff changeset
4 suppressPackageStartupMessages(library("ape"))
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
5 suppressPackageStartupMessages(library("data.table"))
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
6 suppressPackageStartupMessages(library("dplyr"))
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
7 suppressPackageStartupMessages(library("ggplot2"))
725b160c91f0 Uploaded
greg
parents:
diff changeset
8 suppressPackageStartupMessages(library("knitr"))
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
9 suppressPackageStartupMessages(library("optparse"))
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
10 suppressPackageStartupMessages(library("poppr"))
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
11 suppressPackageStartupMessages(library("RColorBrewer"))
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
12 suppressPackageStartupMessages(library("RPostgres"))
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
13 suppressPackageStartupMessages(library("vcfR"))
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
14 suppressPackageStartupMessages(library("vegan"))
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
15
725b160c91f0 Uploaded
greg
parents:
diff changeset
16 option_list <- list(
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
17 make_option(c("--database_connection_string"), action="store", dest="database_connection_string", help="Corals (stag) database connection string"),
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
18 make_option(c("--input_affy_metadata"), action="store", dest="input_affy_metadata", help="Affymetrix 96 well plate input file"),
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
19 make_option(c("--input_pop_info"), action="store", dest="input_pop_info", help="Population information input file"),
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
20 make_option(c("--input_vcf"), action="store", dest="input_vcf", help="VCF input file"),
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
21 make_option(c("--output_stag_db_report"), action="store", dest="output_stag_db_report", help="stag db report output file")
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
22 )
725b160c91f0 Uploaded
greg
parents:
diff changeset
23
725b160c91f0 Uploaded
greg
parents:
diff changeset
24 parser <- OptionParser(usage="%prog [options] file", option_list=option_list);
725b160c91f0 Uploaded
greg
parents:
diff changeset
25 args <- parse_args(parser, positional_arguments=TRUE);
725b160c91f0 Uploaded
greg
parents:
diff changeset
26 opt <- args$options;
725b160c91f0 Uploaded
greg
parents:
diff changeset
27
725b160c91f0 Uploaded
greg
parents:
diff changeset
28 get_file_path = function(file_name) {
725b160c91f0 Uploaded
greg
parents:
diff changeset
29 file_path = paste("output_plots_dir", file_name, sep="/");
725b160c91f0 Uploaded
greg
parents:
diff changeset
30 return(file_path);
725b160c91f0 Uploaded
greg
parents:
diff changeset
31 }
725b160c91f0 Uploaded
greg
parents:
diff changeset
32
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
33 get_database_connection <- function(db_conn_string) {
d2057e183772 Uploaded
greg
parents: 7
diff changeset
34 # Instantiate database connection.
d2057e183772 Uploaded
greg
parents: 7
diff changeset
35 # The connection string has this format:
d2057e183772 Uploaded
greg
parents: 7
diff changeset
36 # postgresql://user:password@host/dbname
d2057e183772 Uploaded
greg
parents: 7
diff changeset
37 conn_items <- strsplit(db_conn_string, "://")[[1]];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
38 string_needed <- conn_items[2];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
39 items_needed <- strsplit(string_needed, "@")[[1]];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
40 user_pass_string <- items_needed[1];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
41 host_dbname_string <- items_needed[2];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
42 user_pass_items <- strsplit(user_pass_string, ":")[[1]];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
43 host_dbname_items <- strsplit(host_dbname_string, "/")[[1]];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
44 user <- user_pass_items[1];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
45 pass <- user_pass_items[2];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
46 host <- host_dbname_items[1];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
47 dbname <- host_dbname_items[2];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
48 # FIXME: is there a way to not hard-code the port?
d2057e183772 Uploaded
greg
parents: 7
diff changeset
49 conn <- DBI::dbConnect(RPostgres::Postgres(), host=host, port='5432', dbname=dbname, user=user, password=pass);
d2057e183772 Uploaded
greg
parents: 7
diff changeset
50 return (conn);
d2057e183772 Uploaded
greg
parents: 7
diff changeset
51 }
d2057e183772 Uploaded
greg
parents: 7
diff changeset
52
3
1bc815d9c8c5 Uploaded
greg
parents: 2
diff changeset
53 # Read in VCF input file.
2
86aaadf36a4f Uploaded
greg
parents: 0
diff changeset
54 vcf <- read.vcfR(opt$input_vcf);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
55
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
56 #Missing GT in samples submitted
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
57 gt <- extract.gt(vcf, element="GT", as.numeric=FALSE);
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
58 myMiss <- apply(gt, MARGIN=2, function(x){ sum(is.na(x))});
d2057e183772 Uploaded
greg
parents: 7
diff changeset
59 myMiss <- (myMiss / nrow(vcf)) * 100;
d2057e183772 Uploaded
greg
parents: 7
diff changeset
60 miss <- data.frame(myMiss);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
61
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
62 hets <- apply(gt, MARGIN=2, function(x) {sum(lengths(regmatches(x, gregexpr("0/1", x))))} );
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
63 hets <- (hets / nrow(vcf)) * 100;
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
64 ht <- data.frame(hets);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
65
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
66 refA <- apply(gt, MARGIN=2, function(x) {sum(lengths(regmatches(x, gregexpr("0/0", x))))} );
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
67 refA <- (refA / nrow(vcf)) * 100;
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
68 rA <- data.frame(refA);
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
69
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
70 altB <- apply(gt, MARGIN=2, function(x) {sum(lengths(regmatches(x, gregexpr("1/1", x))))} );
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
71 altB <- (altB / nrow(vcf)) * 100;
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
72 aB <- data.frame(altB);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
73
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
74 # Convert VCF file into a genind for the Poppr package.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
75 # TODO: probably should not hard-code 2 cores.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
76 gl <- vcfR2genlight(vcf, n.cores=2);
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
77 gind <- new("genind", (as.matrix(gl)));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
78
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
79 # Add population information to the genind object.
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
80 poptab <- read.table(opt$input_pop_info, check.names=FALSE, header=F, na.strings=c("", "NA"), stringsAsFactors = FALSE);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
81 colnames(poptab) <- c("row_id", "affy_id", "user_specimen_id", "region");
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
82 gind@pop <- as.factor(poptab$region);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
83
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
84 # Convert genind object to a genclone object.
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
85 obj2 <- as.genclone(gind);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
86
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
87 # Calculate the bitwise distance between individuals.
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
88 xdis <- bitwise.dist(obj2);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
89
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
90 # Multilocus genotypes (threshold of 16%).
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
91 mlg.filter(obj2, distance=xdis) <- 0.016;
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
92 m <- mlg.table(obj2, background=TRUE, color=TRUE);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
93
725b160c91f0 Uploaded
greg
parents:
diff changeset
94 # Create table of MLGs.
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
95 id <- mlg.id(obj2);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
96 dt <- data.table(id, keep.rownames=TRUE);
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
97 setnames(dt, c("id"), c("affy_id"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
98
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
99 # Read user's Affymetrix 96 well plate csv file.
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
100 pinfo <- read.table(opt$input_affy_metadata, header=TRUE, stringsAsFactors=FALSE, sep="\t");
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
101 pinfo$user_specimen_id <- as.character(pinfo$user_specimen_id);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
102 pinfo2 <- as.character(pinfo$user_specimen_id);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
103 pi <- data.table(pinfo2);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
104 setnames(pi, c("pinfo2"), c("user_specimen_id"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
105
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
106 # Connect to database.
d2057e183772 Uploaded
greg
parents: 7
diff changeset
107 conn <- get_database_connection(opt$database_connection_string);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
108
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
109 # Import the sample table.
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
110 mD <- tbl(conn, "sample");
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
111
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
112 # Select user_specimen_id and mlg columns.
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
113 smlg <- mD %>% select(user_specimen_id, coral_mlg_clonal_id, symbio_mlg_clonal_id, affy_id);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
114
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
115 # Convert to dataframe.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
116 sm <- data.frame(smlg);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
117 sm[sm==""] <- NA;
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
118
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
119 # Convert missing data into data table.
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
120 mi <-setDT(miss, keep.rownames=TRUE)[];
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
121 setnames(mi, c("rn"), c("affy_id"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
122 setnames(mi, c("myMiss"), c("percent_missing_data_coral"));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
123 # Round missing data to two digits.
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
124 mi$percent_missing_data_coral <- round(mi$percent_missing_data_coral, digits=2);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
125
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
126 # Convert heterozygosity data into data table.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
127 ht <-setDT(ht, keep.rownames=TRUE)[];
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
128 setnames(ht, c("rn"), c("affy_id"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
129 setnames(ht, c("hets"), c("percent_mixed_coral"));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
130 # Round missing data to two digits.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
131 ht$percent_mixed<-round(ht$percent_mixed, digits=2);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
132
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
133 # Convert refA data into data.table.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
134 rA <-setDT(rA, keep.rownames=TRUE)[];
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
135 setnames(rA, c("rn"), c("affy_id"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
136 setnames(rA, c("refA"), c("percent_reference_coral"));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
137 # round missing data to two digits.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
138 rA$percent_reference<-round(rA$percent_reference, digits=2);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
139
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
140 # Convert altB data into data table.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
141 aB <-setDT(aB, keep.rownames=TRUE)[];
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
142 setnames(aB, c("rn"), c("affy_id"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
143 setnames(aB, c("altB"), c("percent_alternative_coral"));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
144 # Round missing data to two digits.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
145 aB$percent_alternative<-round(aB$percent_alternative, digits=2);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
146
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
147 #convert mlg id to data.table format
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
148 dt <- data.table(id, keep.rownames=TRUE);
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
149 setnames(dt, c("id"), c("affy_id"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
150
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
151 # Transform.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
152 df3 <- dt %>%
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
153 group_by(row_number()) %>%
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
154 dplyr::rename(group='row_number()') %>%
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
155 unnest (user_specimen_id) %>%
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
156 # Join with mlg table.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
157 left_join(sm %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
158 select("affy_id","coral_mlg_clonal_id"),
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
159 by='affy_id');
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
160
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
161 # If found in database, group members on previous mlg id.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
162 uniques <- unique(df3[c("group", "coral_mlg_clonal_id")]);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
163 uniques <- uniques[!is.na(uniques$coral_mlg_clonal_id),];
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
164 na.mlg <- which(is.na(df3$coral_mlg_clonal_id));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
165 na.group <- df3$group[na.mlg];
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
166 df3$coral_mlg_clonal_id[na.mlg] <- uniques$coral_mlg_clonal_id[match(na.group, uniques$group)];
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
167
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
168 # Determine if the sample mlg matched previous genotyped sample.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
169 df4<- df3 %>%
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
170 group_by(group) %>%
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
171 mutate(DB_match = ifelse(is.na(coral_mlg_clonal_id),"no_match","match"));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
172
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
173 # Create new mlg id for samples that did not match those in the database.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
174 none <- unique(df4[c("group", "coral_mlg_clonal_id")]);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
175 none <- none[is.na(none$coral_mlg_clonal_id),];
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
176 na.mlg2 <- which(is.na(df4$coral_mlg_clonal_id));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
177 n.g <- df4$group[na.mlg2];
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
178 ct <- length(unique(n.g));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
179
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
180 # List of new group ids, the sequence starts at the number of
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
181 # ids present in df4$coral_mlg_clonal_ids plus 1. Not sure if
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
182 # the df4 file contains all ids. If it doesn't then look below
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
183 # to change the seq() function.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
184 n.g_ids <- sprintf("HG%04d", seq((sum(!is.na(unique(df4["coral_mlg_clonal_id"]))) + 1), by=1, length=ct));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
185 # This is a key for pairing group with new ids.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
186 rat <- cbind(unique(n.g), n.g_ids);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
187 # this for loop assigns the new id iteratively for all that have NA.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
188 for (i in 1:length(na.mlg2)) {
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
189 df4$coral_mlg_clonal_id[na.mlg2[i]] <- n.g_ids[match(df4$group[na.mlg2[i]], unique(n.g))];
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
190 }
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
191
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
192 # subset poptab for all samples.
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
193 subpop <- poptab[c(2, 3)];
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
194
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
195 # Merge data frames for final table.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
196 report_user <- pi %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
197 left_join(subpop %>%
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
198 select("affy_id", "user_specimen_id"),
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
199 by='user_specimen_id') %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
200 left_join(df4 %>%
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
201 select("affy_id", "coral_mlg_clonal_id", "DB_match"),
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
202 by='affy_id') %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
203 left_join(mi %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
204 select("affy_id", "percent_missing_data_coral"),
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
205 by='affy_id') %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
206 left_join(ht %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
207 select("affy_id", "percent_mixed_coral"),
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
208 by='affy_id') %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
209 left_join(rA %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
210 select("affy_id", "percent_reference_coral"),
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
211 by='affy_id') %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
212 left_join(aB %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
213 select("affy_id", "percent_alternative_coral"),
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
214 by='affy_id') %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
215 mutate(DB_match = ifelse(is.na(DB_match), "failed", DB_match))%>%
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
216 mutate(coral_mlg_clonal_id=ifelse(is.na(coral_mlg_clonal_id), "failed", coral_mlg_clonal_id))%>%
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
217 ungroup() %>%
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
218 select(-group);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
219
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
220 write.csv(report_user, file=paste(opt$output_stag_db_report), quote=FALSE);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
221
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
222 # Combine sample information for database.
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
223 report_db <- pinfo %>%
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
224 left_join(report_user %>%
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
225 select("user_specimen_id", "affy_id", "coral_mlg_clonal_id", "DB_match",
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
226 "percent_missing_data_coral", "percent_mixed_coral", "percent_reference_coral",
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
227 "percent_alternative_coral"),
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
228 by='user_specimen_id')
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
229
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
230 # Create vector indicating number of individuals desired
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
231 # made from affy_id collumn of report_user data table.
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
232 i <- report_user[[2]];
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
233 sub96 <- obj2[i, mlg.reset=FALSE, drop=FALSE];
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
234
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
235 # Create a phylogeny of samples based on distance matrices.
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
236 cols <- palette(brewer.pal(n=12, name='Set3'));
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
237 set.seed(999);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
238 # Start PDF device driver.
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
239 dev.new(width=10, height=7);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
240 file_path = get_file_path("nj_phylogeny.pdf");
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
241 pdf(file=file_path, width=10, height=7);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
242 # Organize branches by clade.
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
243 theTree <- sub96 %>%
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
244 aboot(dist=provesti.dist, sample=1, tree="nj", cutoff=50, quiet=TRUE) %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
245 ladderize();
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
246 theTree$tip.label <- report_user$user_specimen_id[match(theTree$tip.label, report_user$affy_id)];
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
247 plot.phylo(theTree, tip.color=cols[sub96$pop], label.offset=0.0125, cex=0.3, font=2, lwd=4, align.tip.label=F, no.margin=T);
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
248 # Add a scale bar showing 5% difference..
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
249 add.scale.bar(0, 0.95, length=0.05, cex=0.65, lwd=3);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
250 nodelabels(theTree$node.label, cex=.5, adj=c(1.5, -0.1), frame="n", font=3, xpd=TRUE);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
251 legend("topright", legend=c("Antigua", "Bahamas", "Belize", "Cuba", "Curacao", "Florida", "PuertoRico", "USVI"), text.col=cols, xpd=T, cex=0.8);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
252 dev.off();
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
253
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
254 # Missing data barplot.
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
255 poptab$miss <- report_user$percent_missing_data_coral[match(miss$affy_id, report_user$affy_id)];
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
256 test2 <- which(!is.na(poptab$miss));
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
257 miss96 <- poptab$miss[test2];
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
258 name96 <- poptab$user_specimen_id[test2];
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
259 dev.new(width=10, height=7);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
260 file_path = get_file_path("missing_data.pdf");
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
261 pdf (file=file_path, width=10, height=7);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
262 par(mar = c(8, 4, 4, 2));
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
263 x <- barplot(miss96, las=2, col=cols, ylim=c(0, 3), cex.axis=0.8, space=0.8, ylab="Missingness (%)", xaxt="n");
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
264 text(cex=0.6, x=x-0.25, y=-.05, name96, xpd=TRUE, srt=60, adj=1);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
265 dev.off()
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
266
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
267 # Rarifaction curve.
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
268 # Start PDF device driver.
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
269 #dev.new(width=10, height=7);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
270 #file_path = get_file_path("geno_rarifaction_curve.pdf");
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
271 #pdf(file=file_path, width=10, height=7);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
272 #rarecurve(m, ylab="Number of expected MLGs", sample=min(rowSums(m)), border=NA, fill=NA, font=2, cex=1, col="blue");
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
273 #dev.off();
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
274
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
275 # Genotype accumulation curve, sample is number of
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
276 # loci randomly selected for to make the curve.
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
277 #dev.new(width=10, height=7);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
278 #file_path = get_file_path("geno_accumulation_curve.pdf");
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
279 #pdf(file=file_path, width=10, height=7);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
280 #genotype_curve(gind, sample=5, quiet=TRUE);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
281 #dev.off();
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
282