annotate retrieve_bold.R @ 0:485e1d2753a2 draft default tip

planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
author ecology
date Fri, 21 Jun 2024 08:55:36 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
1 #!/bin/Rscript
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
2
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
3 library(bold)
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
4
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
5 args = commandArgs(trailingOnly=TRUE)
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
6
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
7 raw_marker_list <- paste(args[2],args[3],args[4],args[5],args[6], sep= ",")
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
8 marker_list_W_none <- unique(strsplit(raw_marker_list, ",")[[1]])
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
9 marker_list <- marker_list_W_none[marker_list_W_none != "None"]
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
10 cat("researched marker(s):", marker_list, "\n\n")
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
11
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
12 #Functions to retrieve the subtaxa of each family ((get)subtaxa) and search in Bold and download the available sequences of each subtaxa (get_fasta)
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
13 get_fasta<-function(taxon,filename,arg_mark){
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
14 bold_res<-bold_seqspec(taxon=taxon)
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
15 cat(taxon, "marker list:", unique(bold_res$markercode), "\n")
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
16 x <- data.frame()
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
17 for (mark in arg_mark){x <- rbind(x, bold_res[bold_res$markercode == mark,])}
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
18 if (dim(x)[1] == 0){return(cat("no sequences were found with selected marker(s) for", taxon, "see existing marker list above", "\n"))}
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
19 x[x==""] <- NA
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
20 b_acc <- x$processid
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
21 b_tax <- ifelse(!is.na(x$species_name),x$species_name,ifelse(!is.na(x$genus_name),x$genus_name,ifelse(
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
22 !is.na(x$family_name),x$family_name,ifelse(
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
23 !is.na(x$order_name),x$order_name,ifelse(
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
24 !is.na(x$class_name),x$class_name,x$phylum_name)))))
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
25 b_mark <- x$markercode
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
26 n_acc <- ifelse(!is.na(x$genbank_accession),ifelse(!is.na(x$genbank_accession),paste0("|",x$genbank_accession),""),"")
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
27
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
28 seq <- x$nucleotides
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
29 seqname <- paste(b_acc,b_tax,b_mark,sep="|")
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
30 seqname <- paste0(seqname,n_acc)
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
31 Y <- cbind(seqname,seq)
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
32 colnames(Y) <- c("name","seq")
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
33 fastaLines = c()
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
34 for (rowNum in 1:nrow(Y)){
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
35 fastaLines = c(fastaLines, as.character(paste(">", Y[rowNum,"name"], sep = "")))
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
36 fastaLines = c(fastaLines,as.character(Y[rowNum,"seq"]))
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
37 }
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
38 writeLines(fastaLines,filename)
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
39 }
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
40
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
41
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
42
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
43 taxlist <- readLines(file(as.character(args[1])))
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
44
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
45 for (i in 1:length(taxlist)) {
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
46 cat("Processing ", taxlist[i], "\n")
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
47 tryCatch({
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
48 get_fasta(taxlist[i],paste0(taxlist[i],"bold",".fasta"),marker_list)},
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
49 error=function(e){cat("ERROR :",conditionMessage(e), "\n")}
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
50 )
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
51 }
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
52
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
53
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
54
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
55
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
56
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
57
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
58
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
59
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
60
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
61
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
62
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
63
485e1d2753a2 planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff changeset
64