annotate multilocus_genotype.R @ 17:85f8fc57eee4 draft

Uploaded
author greg
date Wed, 17 Apr 2019 09:07:04 -0400
parents c4ec8727b50c
children 1190ee1456f6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
1 #!/usr/bin/env Rscript
725b160c91f0 Uploaded
greg
parents:
diff changeset
2
725b160c91f0 Uploaded
greg
parents:
diff changeset
3 suppressPackageStartupMessages(library("adegenet"))
725b160c91f0 Uploaded
greg
parents:
diff changeset
4 suppressPackageStartupMessages(library("ape"))
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
5 suppressPackageStartupMessages(library("data.table"))
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
6 suppressPackageStartupMessages(library("dbplyr"))
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
7 suppressPackageStartupMessages(library("dplyr"))
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
8 suppressPackageStartupMessages(library("ggplot2"))
725b160c91f0 Uploaded
greg
parents:
diff changeset
9 suppressPackageStartupMessages(library("knitr"))
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
10 suppressPackageStartupMessages(library("optparse"))
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
11 suppressPackageStartupMessages(library("poppr"))
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
12 suppressPackageStartupMessages(library("RColorBrewer"))
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
13 suppressPackageStartupMessages(library("rnaturalearth"))
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
14 suppressPackageStartupMessages(library("rnaturalearthdata"))
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
15 suppressPackageStartupMessages(library("RPostgres"))
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
16 suppressPackageStartupMessages(library("sf"))
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
17 suppressPackageStartupMessages(library(SNPRelate))
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
18 suppressPackageStartupMessages(library("tidyr"))
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
19 suppressPackageStartupMessages(library("vcfR"))
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
20 suppressPackageStartupMessages(library("vegan"))
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
21 suppressPackageStartupMessages(library("yarrr"))
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
22 theme_set(theme_bw())
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
23
725b160c91f0 Uploaded
greg
parents:
diff changeset
24 option_list <- list(
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
25 make_option(c("--database_connection_string"), action="store", dest="database_connection_string", help="Corals (stag) database connection string"),
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
26 make_option(c("--input_affy_metadata"), action="store", dest="input_affy_metadata", help="Affymetrix 96 well plate input file"),
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
27 make_option(c("--input_pop_info"), action="store", dest="input_pop_info", help="Population information input file"),
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
28 make_option(c("--input_vcf"), action="store", dest="input_vcf", help="VCF input file"),
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
29 make_option(c("--output_stag_db_report"), action="store", dest="output_stag_db_report", help="stag db report output file"),
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
30 make_option(c("--nj_tree"), action="store", dest="nj_tree", help="neighbor-joining tree output file")
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
31 )
725b160c91f0 Uploaded
greg
parents:
diff changeset
32
725b160c91f0 Uploaded
greg
parents:
diff changeset
33 parser <- OptionParser(usage="%prog [options] file", option_list=option_list);
725b160c91f0 Uploaded
greg
parents:
diff changeset
34 args <- parse_args(parser, positional_arguments=TRUE);
725b160c91f0 Uploaded
greg
parents:
diff changeset
35 opt <- args$options;
725b160c91f0 Uploaded
greg
parents:
diff changeset
36
725b160c91f0 Uploaded
greg
parents:
diff changeset
37 get_file_path = function(file_name) {
725b160c91f0 Uploaded
greg
parents:
diff changeset
38 file_path = paste("output_plots_dir", file_name, sep="/");
725b160c91f0 Uploaded
greg
parents:
diff changeset
39 return(file_path);
725b160c91f0 Uploaded
greg
parents:
diff changeset
40 }
725b160c91f0 Uploaded
greg
parents:
diff changeset
41
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
42 get_database_connection <- function(db_conn_string) {
d2057e183772 Uploaded
greg
parents: 7
diff changeset
43 # Instantiate database connection.
d2057e183772 Uploaded
greg
parents: 7
diff changeset
44 # The connection string has this format:
d2057e183772 Uploaded
greg
parents: 7
diff changeset
45 # postgresql://user:password@host/dbname
d2057e183772 Uploaded
greg
parents: 7
diff changeset
46 conn_items <- strsplit(db_conn_string, "://")[[1]];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
47 string_needed <- conn_items[2];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
48 items_needed <- strsplit(string_needed, "@")[[1]];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
49 user_pass_string <- items_needed[1];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
50 host_dbname_string <- items_needed[2];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
51 user_pass_items <- strsplit(user_pass_string, ":")[[1]];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
52 host_dbname_items <- strsplit(host_dbname_string, "/")[[1]];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
53 user <- user_pass_items[1];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
54 pass <- user_pass_items[2];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
55 host <- host_dbname_items[1];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
56 dbname <- host_dbname_items[2];
d2057e183772 Uploaded
greg
parents: 7
diff changeset
57 # FIXME: is there a way to not hard-code the port?
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
58 conn <- DBI::dbConnect(RPostgres::Postgres(), host=host, port="5432", dbname=dbname, user=user, password=pass);
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
59 return (conn);
d2057e183772 Uploaded
greg
parents: 7
diff changeset
60 }
d2057e183772 Uploaded
greg
parents: 7
diff changeset
61
3
1bc815d9c8c5 Uploaded
greg
parents: 2
diff changeset
62 # Read in VCF input file.
2
86aaadf36a4f Uploaded
greg
parents: 0
diff changeset
63 vcf <- read.vcfR(opt$input_vcf);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
64
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
65 # Convert VCF file into a genind for the Poppr package.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
66 # TODO: probably should not hard-code 2 cores.
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
67 # changed to genind format for extracting alleles later
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
68 # trade-off is it is a bit slower to import data
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
69 # gl <- vcfR2genlight(vcf, n.cores=2)
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
70 # gind <- new("genind", (as.matrix(gl)))
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
71 gind <- vcfR2genind(vcf);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
72
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
73 # Add population information to the genind object.
10
6c93244a36e2 Uploaded
greg
parents: 9
diff changeset
74 poptab <- read.table(opt$input_pop_info, check.names=FALSE, header=F, na.strings=c("", "NA"), stringsAsFactors=FALSE, sep="\t");
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
75 colnames(poptab) <- c("row_id", "affy_id", "user_specimen_id", "region");
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
76 gind@pop <- as.factor(poptab$region);
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
77 strata(gind)<-data.frame(pop(gind));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
78
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
79 # Convert genind object to a genclone object.
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
80 obj2 <- as.genclone(gind);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
81
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
82 # Calculate the bitwise distance between individuals.
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
83 xdis <- bitwise.dist(obj2);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
84
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
85 # Multilocus genotypes (threshold of 3.2%).
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
86 # threshold doubled because of how the data is formatted in genind compared to genlight
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
87 mlg.filter(obj2, distance=xdis) <- 0.032;
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
88 m <- mlg.table(obj2, background=TRUE, color=TRUE);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
89
725b160c91f0 Uploaded
greg
parents:
diff changeset
90 # Create table of MLGs.
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
91 id <- mlg.id(obj2);
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
92 #dt <- data.table(id, keep.rownames=TRUE);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
93 #setnames(dt, c("id"), c("affy_id"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
94
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
95 # Read user's Affymetrix 96 well plate tabular file.
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
96 pinfo <- read.table(opt$input_affy_metadata, header=FALSE, stringsAsFactors=FALSE, sep="\t", na.strings = c("", "NA"));
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
97 colnames(pinfo) <- c("user_specimen_id", "field_call", "bcoral_genet_id", "bsym_genet_id", "reef",
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
98 "region", "latitude", "longitude", "geographic_origin", "sample_location",
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
99 "latitude_outplant", "longitude_outplant", "depth", "disease_resist",
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
100 "bleach_resist", "mortality","tle", "spawning", "collector_last_name",
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
101 "collector_first_name", "organization", "collection_date", "email", "seq_facility",
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
102 "array_version", "public", "public_after_date", "sperm_motility", "healing_time",
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
103 "dna_extraction_method", "dna_concentration", "registry_id");
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
104 pinfo$user_specimen_id <- as.character(pinfo$user_specimen_id);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
105 pinfo2 <- as.character(pinfo$user_specimen_id);
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
106 pi <- data.table(pinfo2, pinfo$field_call);
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
107 setnames(pi, c("pinfo2"), c("user_specimen_id"));
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
108 setnames(pi, c("V2"), c("field_call"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
109
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
110 # Connect to database.
d2057e183772 Uploaded
greg
parents: 7
diff changeset
111 conn <- get_database_connection(opt$database_connection_string);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
112
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
113 # Import the sample table.
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
114 sample_table <- tbl(conn, "sample");
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
115
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
116 # Import the genotype table.
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
117 genotype_table <- tbl(conn, "genotype");
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
118
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
119 # Select columns from the sample table and the
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
120 # genotype table joined by genotype_id.
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
121 sample_table_columns <- sample_table %>% select(user_specimen_id, affy_id, genotype_id);
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
122 smlg <- sample_table_columns %>%
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
123 left_join(genotype_table %>%
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
124 select("id", "coral_mlg_clonal_id", "symbio_mlg_clonal_id"),
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
125 by=c("genotype_id" = "id"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
126
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
127 # Convert to dataframe.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
128 sm <- data.frame(smlg);
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
129 # Name the columns.
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
130 colnames(sm) <- c("user_specimen_id", "affy_id", "genotype_id", "coral_mlg_clonal_id", "symbio_mlg_clonal_id");
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
131
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
132 # Missing GT in samples submitted.
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
133 gt <- extract.gt(vcf, element="GT", as.numeric=FALSE);
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
134 myMiss <- apply(gt, MARGIN=2, function(x){ sum(is.na(x))});
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
135 myMiss <- (myMiss / nrow(vcf)) * 100;
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
136 miss <- data.frame(myMiss);
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
137
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
138 # Convert missing data into data table.
8
d2057e183772 Uploaded
greg
parents: 7
diff changeset
139 mi <-setDT(miss, keep.rownames=TRUE)[];
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
140 setnames(mi, c("rn"), c("affy_id"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
141 setnames(mi, c("myMiss"), c("percent_missing_data_coral"));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
142 # Round missing data to two digits.
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
143 mi$percent_missing_data_coral <- round(mi$percent_missing_data_coral, digits=2);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
144
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
145 #heterozygous alleles
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
146 hets <- apply(gt, MARGIN=2, function(x) {sum(lengths(regmatches(x, gregexpr("0/1", x))))} );
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
147 hets <- (hets / nrow(vcf)) * 100;
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
148 ht <- data.frame(hets);
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
149
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
150 # Convert heterozygosity data into data table.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
151 ht <-setDT(ht, keep.rownames=TRUE)[];
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
152 setnames(ht, c("rn"), c("affy_id"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
153 setnames(ht, c("hets"), c("percent_mixed_coral"));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
154 # Round missing data to two digits.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
155 ht$percent_mixed<-round(ht$percent_mixed, digits=2);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
156
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
157 #reference alleles
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
158 refA <- apply(gt, MARGIN=2, function(x) {sum(lengths(regmatches(x, gregexpr("0/0", x))))} );
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
159 refA <- (refA / nrow(vcf)) * 100;
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
160 rA <- data.frame(refA);
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
161
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
162 # Convert refA data into data.table.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
163 rA <-setDT(rA, keep.rownames=TRUE)[];
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
164 setnames(rA, c("rn"), c("affy_id"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
165 setnames(rA, c("refA"), c("percent_reference_coral"));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
166 # round missing data to two digits.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
167 rA$percent_reference<-round(rA$percent_reference, digits=2);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
168
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
169 #alternative alleles
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
170 altB <- apply(gt, MARGIN=2, function(x) {sum(lengths(regmatches(x, gregexpr("1/1", x))))} );
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
171 altB <- (altB / nrow(vcf)) * 100;
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
172 aB <- data.frame(altB);
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
173
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
174 # Convert altB data into data table.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
175 aB <-setDT(aB, keep.rownames=TRUE)[];
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
176 setnames(aB, c("rn"), c("affy_id"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
177 setnames(aB, c("altB"), c("percent_alternative_coral"));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
178 # Round missing data to two digits.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
179 aB$percent_alternative<-round(aB$percent_alternative, digits=2);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
180
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
181 #convert mlg id to data.table format
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
182 dt <- data.table(id, keep.rownames=TRUE);
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
183 setnames(dt, c("id"), c("affy_id"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
184
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
185 # Transform.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
186 df3 <- dt %>%
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
187 group_by(row_number()) %>%
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
188 dplyr::rename(group="row_number()") %>%
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
189 unnest (affy_id) %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
190 # Join with mlg table.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
191 left_join(sm %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
192 select("affy_id","coral_mlg_clonal_id"),
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
193 by="affy_id");
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
194
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
195 # If found in database, group members on previous mlg id.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
196 uniques <- unique(df3[c("group", "coral_mlg_clonal_id")]);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
197 uniques <- uniques[!is.na(uniques$coral_mlg_clonal_id),];
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
198 na.mlg <- which(is.na(df3$coral_mlg_clonal_id));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
199 na.group <- df3$group[na.mlg];
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
200 df3$coral_mlg_clonal_id[na.mlg] <- uniques$coral_mlg_clonal_id[match(na.group, uniques$group)];
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
201
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
202 # Determine if the sample mlg matched previous genotyped sample.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
203 df4<- df3 %>%
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
204 group_by(group) %>%
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
205 mutate(DB_match = ifelse(is.na(coral_mlg_clonal_id),"no_match", "match"));
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
206
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
207 # Create new mlg id for samples that did not match those in the database.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
208 none <- unique(df4[c("group", "coral_mlg_clonal_id")]);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
209 none <- none[is.na(none$coral_mlg_clonal_id),];
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
210 na.mlg2 <- which(is.na(df4$coral_mlg_clonal_id));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
211 n.g <- df4$group[na.mlg2];
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
212 ct <- length(unique(n.g));
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
213
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
214 # List of new group ids, the sequence starts at the number of
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
215 # ids present in df4$coral_mlg_clonal_ids plus 1. Not sure if
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
216 # the df4 file contains all ids. If it doesn't then look below
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
217 # to change the seq() function.
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
218 n.g_ids <- sprintf("HG%04d", seq((sum(!is.na(unique(df4["coral_mlg_clonal_id"]))) + 1), by=1, length=ct));
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
219 # Pair group with new ids.
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
220 rat <- cbind(unique(n.g), n.g_ids);
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
221 # Assign the new id iteratively for all that have NA.
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
222 for (i in 1:length(na.mlg2)) {
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
223 df4$coral_mlg_clonal_id[na.mlg2[i]] <- n.g_ids[match(df4$group[na.mlg2[i]], unique(n.g))];
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
224 }
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
225
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
226 # Subset poptab for all samples.
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
227 subpop <- poptab[c(2, 3)];
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
228
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
229 # Merge data frames for final table.
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
230 report_user <- pi %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
231 left_join(subpop %>%
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
232 select("affy_id", "user_specimen_id"),
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
233 by="user_specimen_id") %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
234 left_join(df4 %>%
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
235 select("affy_id", "coral_mlg_clonal_id", "DB_match"),
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
236 by="affy_id") %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
237 left_join(mi %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
238 select("affy_id", "percent_missing_data_coral"),
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
239 by="affy_id") %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
240 left_join(ht %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
241 select("affy_id", "percent_mixed_coral"),
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
242 by="affy_id") %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
243 left_join(rA %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
244 select("affy_id", "percent_reference_coral"),
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
245 by="affy_id") %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
246 left_join(aB %>%
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
247 select("affy_id", "percent_alternative_coral"),
16
c4ec8727b50c Uploaded
greg
parents: 15
diff changeset
248 by="affy_id") %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
249 mutate(DB_match = ifelse(is.na(DB_match), "failed", DB_match))%>%
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
250 mutate(coral_mlg_clonal_id = ifelse(is.na(coral_mlg_clonal_id), "failed", coral_mlg_clonal_id)) %>%
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
251 mutate(genetic_coral_species_call=ifelse(percent_alternative_coral >= 40 & percent_alternative_coral<= 44.5,"A.palmata","other")) %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
252 mutate(genetic_coral_species_call=ifelse(percent_alternative_coral >= 45.5 & percent_alternative_coral<= 50,"A.cervicornis",genetic_coral_species_call)) %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
253 mutate(genetic_coral_species_call=ifelse(percent_heterozygous_coral > 40,"A.prolifera",genetic_coral_species_call)) %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
254 ungroup() %>%
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
255 select(-group);
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
256
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
257 write.csv(report_user, file=opt$output_stag_db_report, quote=FALSE);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
258
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
259 # Database tables
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
260 ## Sample.table
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
261 sample_db <- pinfo %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
262 left_join(
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
263 report_user %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
264 select("user_specimen_id","affy_id",
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
265 "percent_missing_data_coral","percent_heterozygous_coral","percent_reference_coral",
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
266 "percent_alternative_coral"),
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
267 by='user_specimen_id');
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
268
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
269 ###representative clone for genotype.table
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
270 cc<-clonecorrect(obj2, strata= ~pop.gind.);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
271 id_rep<-mlg.id(cc);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
272 dt_cc<-data.table(id_rep,keep.rownames = TRUE);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
273 setnames(dt_cc, c("id_rep"), c("affy_id"));
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
274
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
275 ###transform mlg data.table
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
276 df_cc <- dt_cc %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
277 group_by(row_number()) %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
278 rename(group='row_number()') %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
279 unnest(affy_id) %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
280 left_join(report_user %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
281 select("coral_mlg_clonal_id","user_specimen_id","affy_id"),
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
282 by='affy_id') %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
283 mutate(coral_mlg_rep_sample_id=ifelse(is.na(coral_mlg_clonal_id),"",affy_id)) %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
284 ungroup() %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
285 select(-group);
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
286
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
287 ##geno.table
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
288 geno_db <- df4 %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
289 left_join(df_cc %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
290 select("affy_id","coral_mlg_rep_sample_id","user_specimen_id"),
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
291 by='affy_id') %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
292 ungroup() %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
293 select(-group);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
294
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
295 ##taxonomy.table
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
296
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
297 tax_db <- report_user %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
298 select(genetic_coral_species_call, affy_id) %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
299 mutate(genus_name =ifelse(genetic_coral_species_call==
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
300 genetic_coral_species_call[grep("^A.*",genetic_coral_species_call)],"Acropora","other")) %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
301 mutate(species_name=ifelse(genetic_coral_species_call=="A.palmata","palmata","other"))%>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
302 mutate(species_name=ifelse(genetic_coral_species_call =="A.cervicornis","cervicornis",species_name))%>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
303 mutate(species_name=ifelse(genetic_coral_species_call=="A.prolifera","prolifera", species_name));
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
304
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
305
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
306 # Table of alleles for the new samples
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
307 ## subset to new plate data
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
308 ### create vector indicating number of individuals desired
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
309 ### made from affy_id collumn from report_user data table
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
310 i<-ifelse(is.na(report_user[1]),"",report_user[[1]]);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
311 i<-i[!apply(i == "", 1, all),];
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
312 sub96<-obj2[i, mlg.reset = FALSE, drop = FALSE];
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
313
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
314 # convert to data frame
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
315 at_96<-genind2df(sub96, sep="");
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
316 at_96<- at_96 %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
317 select(-pop);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
318
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
319 # allele string for Allele.table in database
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
320 uat_96<-unite(at_96, alleles, 1:19696, sep = " ", remove = TRUE);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
321 uat_96<-setDT(uat_96, keep.rownames = TRUE)[];
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
322 setnames(uat_96, c("rn"), c("user_specimen_id"));
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
323 # write.csv(uat_96,file=paste("Seed_genotype_alleles.csv",sep = ""),quote=FALSE,row.names=FALSE);
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
324
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
325 # Create a phylogeny of samples based on distance matrices.
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
326 cols <- piratepal("basel");
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
327 set.seed(999);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
328 # Start PDF device driver.
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
329 dev.new(width=10, height=7);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
330 file_path = get_file_path("nj_phylogeny.pdf");
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
331 pdf(file=file_path, width=10, height=7);
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
332 # Organize branches by clade.
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
333 theTree <- sub96 %>%
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
334 aboot(dist=provesti.dist, sample=100, tree="nj", cutoff=50, quiet=TRUE) %>%
7
18001e7cb199 Uploaded
greg
parents: 4
diff changeset
335 ladderize();
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
336 theTree$tip.label <- report_user$user_specimen_id[match(theTree$tip.label, report_user$affy_id)];
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
337 plot.phylo(theTree, tip.color=cols[sub96$pop], label.offset=0.0125, cex=0.3, font=2, lwd=4, align.tip.label=F, no.margin=T);
4
a7cce4091e80 Uploaded
greg
parents: 3
diff changeset
338 # Add a scale bar showing 5% difference..
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
339 add.scale.bar(0, 0.95, length=0.05, cex=0.65, lwd=3);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
340 nodelabels(theTree$node.label, cex=.5, adj=c(1.5, -0.1), frame="n", font=3, xpd=TRUE);
17
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
341 legend("topright", legend=c(levels(sub96$pop)), text.col=cols, xpd=T, cex=0.8);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
342 dev.off()
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
343
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
344 write.tree(theTree, file =opt$nj_tree, quote=FALSE);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
345
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
346 # identity-by-state analysis
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
347 #if (!requireNamespace("BiocManager", quietly = TRUE))
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
348 # install.packages("BiocManager")
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
349 #BiocManager::install("SNPRelate", version = "3.8")
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
350
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
351 #subset VCF to the user samples
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
352 l<-length(i);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
353 n<-ncol(vcf@gt);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
354 s<-n-l;
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
355 svcf<-vcf[,s:n];
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
356 write.vcf(svcf, "subset.vcf.gz");
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
357
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
358 vcf.fn <- "subset.vcf.gz";
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
359 snpgdsVCF2GDS(vcf.fn, "test3.gds", method="biallelic.only");
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
360
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
361 genofile <- snpgdsOpen(filename="test3.gds", readonly=FALSE);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
362 hd<-read.gdsn(index.gdsn(genofile, "sample.id"));
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
363 hd<-data.frame(hd);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
364 hd<-setDT(hd, keep.rownames = FALSE)[];
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
365 setnames(hd, c("hd"), c("user_specimen_id"));
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
366
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
367 subpop2<- poptab[c(2,4)];
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
368 poptab_sub <- hd %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
369 left_join(
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
370 subpop2 %>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
371 select("affy_id","region"),
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
372 by='affy_id')%>%
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
373 drop_na();
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
374
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
375 samp.annot <- data.frame(pop.group = c(poptab_sub$region));
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
376 add.gdsn(genofile, "sample.annot", samp.annot);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
377
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
378 pop_code <- read.gdsn(index.gdsn(genofile, path="sample.annot/pop.group"));
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
379 pop.group <- as.factor(read.gdsn(index.gdsn(genofile, "sample.annot/pop.group")));
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
380
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
381 # Identity-By-State Analysis - distance matrix calculation
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
382 ibs <- snpgdsIBS(genofile, num.thread=2, autosome.only=FALSE);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
383
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
384 # cluster analysis on the genome-wide IBS pairwise distance matrix
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
385 set.seed(100);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
386 par(cex=0.6, cex.lab=1, cex.axis=1.5,cex.main=2);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
387 ibs.hc <- snpgdsHCluster(snpgdsIBS(genofile, autosome.only=FALSE));
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
388
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
389 # default clustering.
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
390 dev.new(width=10, height=7);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
391 file_path = get_file_path("IBS_default.pdf");
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
392 pdf (file=file_path, width=10, height=7);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
393 rv <- snpgdsCutTree(ibs.hc, col.list=cols, pch.list=15);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
394 snpgdsDrawTree(rv, main="Color by Cluster", leaflab="perpendicular",y.label=0.2);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
395 legend("topleft", legend=levels(rv$samp.group), xpd=T, col=cols[1:nlevels(rv$samp.group)], pch=15, ncol=4, cex=1.2);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
396 dev.off()
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
397
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
398 # color cluster by region.
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
399 dev.new(width=10, height=7);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
400 file_path = get_file_path("IBS_Region.pdf");
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
401 pdf (file=file_path, width=10, height=7);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
402 race <- as.factor(pop_code);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
403 rv2 <- snpgdsCutTree(ibs.hc,samp.group=race,col.list=cols,pch.list=15);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
404 snpgdsDrawTree(rv2, main="Color by Region", leaflab="perpendicular",y.label=0.2);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
405 legend("topleft", legend=levels(race), xpd=T, col=cols[1:nlevels(race)], pch=15, ncol=4, cex=1.2);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
406 dev.off()
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
407
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
408 #close GDS file
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
409 snpgdsClose(genofile);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
410
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
411 # Sample MLG on a map.
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
412 world <- ne_countries(scale = "medium", returnclass = "sf");
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
413 class(world);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
414
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
415 pinfo$mlg<-report_user$coral_mlg_clonal_id;
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
416 n <- nrow(pinfo);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
417
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
418 mxlat<-max(pinfo$latitude,na.rm = TRUE);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
419 mnlat<-min(pinfo$latitude,na.rm = TRUE);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
420 mxlong<-max(pinfo$longitude,na.rm = TRUE);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
421 mnlong<-min(pinfo$longitude,na.rm = TRUE);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
422
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
423 p5<-ggplot(data = world) +
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
424 geom_sf() +
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
425 coord_sf(xlim = c(mnlong-3, mxlong+3), ylim = c(mnlat-3,mxlat+3), expand = FALSE);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
426
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
427 colourCount = length(unique(pinfo$mlg));
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
428 getPalette = colorRampPalette(piratepal("basel"));
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
429 dev.new(width=10, height=7);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
430 file_path = get_file_path("mlg_map.pdf");
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
431 pdf (file=file_path, width=10, height=7);
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
432 p6<-p5+ geom_point(data = pinfo,aes(x =longitude, y=latitude, group=mlg, color = mlg), alpha=.7, size=3)+
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
433 scale_color_manual(values=getPalette(colourCount))+
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
434 theme(legend.position="bottom")+
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
435 guides(color=guide_legend(nrow=8,byrow=F));
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
436 p6;
85f8fc57eee4 Uploaded
greg
parents: 16
diff changeset
437 dev.off()
0
725b160c91f0 Uploaded
greg
parents:
diff changeset
438
9
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
439 # Missing data barplot.
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
440 poptab$miss <- report_user$percent_missing_data_coral[match(miss$affy_id, report_user$affy_id)];
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
441 test2 <- which(!is.na(poptab$miss));
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
442 miss96 <- poptab$miss[test2];
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
443 name96 <- poptab$user_specimen_id[test2];
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
444 dev.new(width=10, height=7);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
445 file_path = get_file_path("missing_data.pdf");
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
446 pdf (file=file_path, width=10, height=7);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
447 par(mar = c(8, 4, 4, 2));
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
448 x <- barplot(miss96, las=2, col=cols, ylim=c(0, 3), cex.axis=0.8, space=0.8, ylab="Missingness (%)", xaxt="n");
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
449 text(cex=0.6, x=x-0.25, y=-.05, name96, xpd=TRUE, srt=60, adj=1);
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
450 dev.off()
8f2f346a5e1c Uploaded
greg
parents: 8
diff changeset
451
15
62ad61eac1ff Uploaded
greg
parents: 14
diff changeset
452 # Generate a pie chart for each sample with a genotype.
62ad61eac1ff Uploaded
greg
parents: 14
diff changeset
453 # Store the numerical and user_specimen_id values from
62ad61eac1ff Uploaded
greg
parents: 14
diff changeset
454 # report_user for the charts (user_specimen_id names
62ad61eac1ff Uploaded
greg
parents: 14
diff changeset
455 # will be used to label each chart).
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
456 dt1 <- data.table(report_user);
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
457 dt1 <- report_user[c(-2, -3, -4)];
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
458 dt1 <- na.omit(dt1);
15
62ad61eac1ff Uploaded
greg
parents: 14
diff changeset
459 # Translate to N (i.e., number of samples with a
62ad61eac1ff Uploaded
greg
parents: 14
diff changeset
460 # genotype) columns and 5 rows.
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
461 tdt1 <- t(dt1);
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
462 # Make another data table and transpose it the same as dt1 to
15
62ad61eac1ff Uploaded
greg
parents: 14
diff changeset
463 # get numerics. These will feed into the creation of N vectors.
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
464 dt2 <- data.table(report_user);
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
465 dt2 <- report_user[c(-1, -2, -3, -4)];
15
62ad61eac1ff Uploaded
greg
parents: 14
diff changeset
466 # Translate to N columns and 5 rows.
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
467 tdt2 <- t(dt2);
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
468 tdt1_matrix <- as.matrix(tdt1[-1,]);
15
62ad61eac1ff Uploaded
greg
parents: 14
diff changeset
469 # The number of columns is the number of samples with genotypes.
62ad61eac1ff Uploaded
greg
parents: 14
diff changeset
470 nc <- ncol(tdt1_matrix);
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
471 mode(tdt1_matrix) <- "numeric";
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
472 spy <- rowMeans(tdt1_matrix);
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
473 dev.new(width=10, height=7);
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
474 file_path = get_file_path("percent_breakdown.pdf");
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
475 pdf(file=file_path, width=10, height=7);
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
476 # Average pie of all samples.
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
477 labels <- paste(c("missing data", "mixed", "reference", "alternative"), " (", round(spy, 1), "%)", sep="");
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
478 col <- c("GREY", "#006DDB", "#24FF24", "#920000");
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
479 main <- "Average breakdown of SNP assignments across all samples";
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
480 pie(spy, labels=labels, radius=0.60, col=col, main=main, cex.main=.75);
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
481 par(mfrow=c(3, 2));
14
96ee9122823e Uploaded
greg
parents: 12
diff changeset
482 col <- c("GREY", "#006DDB", "#24FF24", "#920000");
15
62ad61eac1ff Uploaded
greg
parents: 14
diff changeset
483 for (i in 1:nc) {
14
96ee9122823e Uploaded
greg
parents: 12
diff changeset
484 tmp_labels <- paste(labels, " (", round(tdt1_matrix[,i], 1), "%)", sep="");
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
485 main <- paste("Breakdown of SNP assignments for", tdt1[1, i]);
14
96ee9122823e Uploaded
greg
parents: 12
diff changeset
486 pie(tdt1_matrix[,i], labels=tmp_labels, radius=0.90, col=col, main=main, cex.main=.85, cex=0.75);
12
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
487 }
ea2914ddea50 Uploaded
greg
parents: 10
diff changeset
488 dev.off()