Mercurial > repos > ecology > retrieve_bold
comparison retrieve_bold.R @ 0:485e1d2753a2 draft default tip
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
| author | ecology |
|---|---|
| date | Fri, 21 Jun 2024 08:55:36 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:485e1d2753a2 |
|---|---|
| 1 #!/bin/Rscript | |
| 2 | |
| 3 library(bold) | |
| 4 | |
| 5 args = commandArgs(trailingOnly=TRUE) | |
| 6 | |
| 7 raw_marker_list <- paste(args[2],args[3],args[4],args[5],args[6], sep= ",") | |
| 8 marker_list_W_none <- unique(strsplit(raw_marker_list, ",")[[1]]) | |
| 9 marker_list <- marker_list_W_none[marker_list_W_none != "None"] | |
| 10 cat("researched marker(s):", marker_list, "\n\n") | |
| 11 | |
| 12 #Functions to retrieve the subtaxa of each family ((get)subtaxa) and search in Bold and download the available sequences of each subtaxa (get_fasta) | |
| 13 get_fasta<-function(taxon,filename,arg_mark){ | |
| 14 bold_res<-bold_seqspec(taxon=taxon) | |
| 15 cat(taxon, "marker list:", unique(bold_res$markercode), "\n") | |
| 16 x <- data.frame() | |
| 17 for (mark in arg_mark){x <- rbind(x, bold_res[bold_res$markercode == mark,])} | |
| 18 if (dim(x)[1] == 0){return(cat("no sequences were found with selected marker(s) for", taxon, "see existing marker list above", "\n"))} | |
| 19 x[x==""] <- NA | |
| 20 b_acc <- x$processid | |
| 21 b_tax <- ifelse(!is.na(x$species_name),x$species_name,ifelse(!is.na(x$genus_name),x$genus_name,ifelse( | |
| 22 !is.na(x$family_name),x$family_name,ifelse( | |
| 23 !is.na(x$order_name),x$order_name,ifelse( | |
| 24 !is.na(x$class_name),x$class_name,x$phylum_name))))) | |
| 25 b_mark <- x$markercode | |
| 26 n_acc <- ifelse(!is.na(x$genbank_accession),ifelse(!is.na(x$genbank_accession),paste0("|",x$genbank_accession),""),"") | |
| 27 | |
| 28 seq <- x$nucleotides | |
| 29 seqname <- paste(b_acc,b_tax,b_mark,sep="|") | |
| 30 seqname <- paste0(seqname,n_acc) | |
| 31 Y <- cbind(seqname,seq) | |
| 32 colnames(Y) <- c("name","seq") | |
| 33 fastaLines = c() | |
| 34 for (rowNum in 1:nrow(Y)){ | |
| 35 fastaLines = c(fastaLines, as.character(paste(">", Y[rowNum,"name"], sep = ""))) | |
| 36 fastaLines = c(fastaLines,as.character(Y[rowNum,"seq"])) | |
| 37 } | |
| 38 writeLines(fastaLines,filename) | |
| 39 } | |
| 40 | |
| 41 | |
| 42 | |
| 43 taxlist <- readLines(file(as.character(args[1]))) | |
| 44 | |
| 45 for (i in 1:length(taxlist)) { | |
| 46 cat("Processing ", taxlist[i], "\n") | |
| 47 tryCatch({ | |
| 48 get_fasta(taxlist[i],paste0(taxlist[i],"bold",".fasta"),marker_list)}, | |
| 49 error=function(e){cat("ERROR :",conditionMessage(e), "\n")} | |
| 50 ) | |
| 51 } | |
| 52 | |
| 53 | |
| 54 | |
| 55 | |
| 56 | |
| 57 | |
| 58 | |
| 59 | |
| 60 | |
| 61 | |
| 62 | |
| 63 | |
| 64 |
