Mercurial > repos > ecology > vigiechiro_idvalid
comparison IdValidTidy.R @ 1:1a77241a9fab draft default tip
planemo upload for repository https://github.com/galaxyecology/tools-ecology/tools/vigiechiro commit 7ef0e58cbcbf41088e359f00b6c86504c773c271
| author | ecology |
|---|---|
| date | Fri, 26 Apr 2019 12:15:41 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:aa3afb770148 | 1:1a77241a9fab |
|---|---|
| 1 #!/usr/bin/env Rscript | |
| 2 | |
| 3 args <- commandArgs(trailingOnly = TRUE) | |
| 4 | |
| 5 #print(args) | |
| 6 | |
| 7 library(data.table) | |
| 8 library(methods) | |
| 9 | |
| 10 | |
| 11 ValidHier=function(x,y) #used to write validator id over observer id | |
| 12 { | |
| 13 if(y==""){x}else{y} | |
| 14 } | |
| 15 | |
| 16 f2p <- function(x) #get date-time data from recording file names | |
| 17 { | |
| 18 if (is(x)[1] == "data.frame") {pretemps <- vector(length = nrow(x))} | |
| 19 op <- options(digits.secs = 3) | |
| 20 pretemps <- paste(substr(x, nchar(x) - 18, nchar(x)-4), ".", substr(x, nchar(x) - 2, nchar(x)), sep = "") | |
| 21 strptime(pretemps, "%Y%m%d_%H%M%OS",tz="UTC")-7200 | |
| 22 } | |
| 23 | |
| 24 | |
| 25 IdCorrect=fread(args[1]) | |
| 26 | |
| 27 #Step 0 :compute id score from 2nd Layer | |
| 28 IdCorrect$IdProb=IdCorrect$tadarida_probabilite | |
| 29 | |
| 30 IdCorrect$observateur_taxon[is.na(IdCorrect$observateur_taxon)]="" | |
| 31 IdCorrect$observateur_probabilite[is.na(IdCorrect$observateur_probabilite)]="" | |
| 32 IdCorrect$validateur_taxon[is.na(IdCorrect$validateur_taxon)]="" | |
| 33 IdCorrect$validateur_probabilite[is.na(IdCorrect$validateur_probabilite)]="" | |
| 34 | |
| 35 | |
| 36 | |
| 37 #Step 1 :compute id with confidence regarding a hierarchy (validator > observer) | |
| 38 IdCorrect$IdV=mapply(ValidHier,IdCorrect$observateur_taxon,IdCorrect$validateur_taxon) | |
| 39 IdCorrect$ConfV=mapply(ValidHier,IdCorrect$observateur_probabilite | |
| 40 ,IdCorrect$validateur_probabilite) | |
| 41 | |
| 42 | |
| 43 #print(paste(length(subset(IdCorrect$ConfV,IdCorrect$ConfV!="")))) | |
| 44 | |
| 45 #Step 2: Get numerictime data | |
| 46 if (substr(IdCorrect$`nom du fichier`[1],2,2)=="i") #for car/walk transects | |
| 47 { | |
| 48 FileInfo=as.data.table(tstrsplit(IdCorrect$`nom du fichier`,"-")) | |
| 49 IdCorrect$Session=as.numeric(substr(FileInfo$V4,5,nchar(FileInfo$V4))) | |
| 50 TimeSec=as.data.table(tstrsplit(FileInfo$V5,"_")) | |
| 51 TimeSec=as.data.frame(TimeSec) | |
| 52 if(sum(TimeSec[,(ncol(TimeSec)-1)]!="00000")==0) #to deal with double Kaleidoscope treatments | |
| 53 { | |
| 54 print("NOMS DE FICHIERS NON CONFORMES") | |
| 55 print("Vous les avez probablement traiter 2 fois par Kaleidoscope") | |
| 56 stop("Merci de nous signaler cette erreur par mail pour correction") | |
| 57 }else{ | |
| 58 IdCorrect$TimeNum=(IdCorrect$Session*800 | |
| 59 +as.numeric(TimeSec[,(ncol(TimeSec)-1)]) | |
| 60 +as.numeric(TimeSec[,(ncol(TimeSec))])/1000) | |
| 61 } | |
| 62 | |
| 63 }else{ | |
| 64 if(substr(IdCorrect$`nom du fichier`[1],2,2)=="a") #for stationary recordings | |
| 65 { | |
| 66 DateRec=as.POSIXlt(f2p(IdCorrect$`nom du fichier`)) | |
| 67 Nuit=format(as.Date(DateRec-43200*(DateRec$hour<12)),format="%d/%m/%Y") | |
| 68 #Nuit[is.na(Nuit)]=0 | |
| 69 IdCorrect$Session=Nuit | |
| 70 IdCorrect$TimeNum=as.numeric(DateRec) | |
| 71 | |
| 72 }else{ | |
| 73 print("NOMS DE FICHIERS NON CONFORMES") | |
| 74 stop("Ils doivent commencer par Cir (routier/pedestre) ou par Car (points fixes") | |
| 75 } | |
| 76 } | |
| 77 | |
| 78 #hist(IdCorrect$TimeNum) | |
| 79 | |
| 80 | |
| 81 | |
| 82 | |
| 83 #Step 3 :treat sequentially each species identified by Tadarida-C | |
| 84 IdExtrap=vector() #to store the id extrapolated from validations | |
| 85 IdC2=IdCorrect[0,] #to store data in the right order | |
| 86 TypeE=vector() #to store the type of extrapolation made | |
| 87 for (j in 1:nlevels(as.factor(IdCorrect$tadarida_taxon))) | |
| 88 { | |
| 89 IdSp=subset(IdCorrect | |
| 90 ,IdCorrect$tadarida_taxon==levels(as.factor(IdCorrect$tadarida_taxon))[j]) | |
| 91 if(sum(IdSp$IdV=="")==(nrow(IdSp))) #case 1 : no validation no change | |
| 92 { | |
| 93 IdC2=rbind(IdC2,IdSp) | |
| 94 IdExtrap=c(IdExtrap,rep(IdSp$tadarida_taxon[1],nrow(IdSp))) | |
| 95 TypeE=c(TypeE,rep(0,nrow(IdSp))) | |
| 96 }else{ #case 2: some validation | |
| 97 Vtemp=subset(IdSp,IdSp$IdV!="") | |
| 98 #case2A: validations are homogeneous | |
| 99 if(nlevels(as.factor(Vtemp$IdV))==1) | |
| 100 { | |
| 101 IdC2=rbind(IdC2,IdSp) | |
| 102 IdExtrap=c(IdExtrap,rep(Vtemp$IdV[1],nrow(IdSp))) | |
| 103 TypeE=c(TypeE,rep(2,nrow(IdSp))) | |
| 104 }else{ | |
| 105 #case 2B: validations are heterogeneous | |
| 106 #case 2B1: some validations confirms the species identified by Tadarida and highest confidence are confirmed | |
| 107 subVT=subset(Vtemp,Vtemp$IdV==levels(as.factor(IdCorrect$tadarida_taxon))[j]) | |
| 108 subVF=subset(Vtemp,Vtemp$IdV!=levels(as.factor(IdCorrect$tadarida_taxon))[j]) | |
| 109 if((nrow(subVT)>0)&(max(subVT$IdProb)>max(subVF$IdProb))) | |
| 110 { | |
| 111 Vtemp=Vtemp[order(Vtemp$IdProb),] | |
| 112 test=(Vtemp$IdV!=Vtemp$tadarida_taxon) | |
| 113 Fr1=max(which(test == TRUE)) #find the error with highest indices | |
| 114 Thr1=mean(Vtemp$IdProb[(Fr1):(Fr1+1)]) #define first threshold as the median confidence between the first error and the confirmed ID right over it | |
| 115 #id over this threshold are considered right | |
| 116 IdHC=subset(IdSp,IdSp$IdProb>Thr1) | |
| 117 IdC2=rbind(IdC2,IdHC) | |
| 118 IdExtrap=c(IdExtrap,rep(Vtemp$IdV[nrow(Vtemp)],nrow(IdHC))) | |
| 119 TypeE=c(TypeE,rep(2,nrow(IdHC))) | |
| 120 #id under this threshold are attributed to validated id closest in time | |
| 121 Vtemp=Vtemp[order(Vtemp$TimeNum),] | |
| 122 cuts <- c(-Inf, Vtemp$TimeNum[-1]-diff(Vtemp$TimeNum)/2, Inf) | |
| 123 CorrV=findInterval(IdSp$TimeNum, cuts) | |
| 124 IdE=Vtemp$IdV[CorrV] | |
| 125 IdEL=subset(IdE,IdSp$IdProb<=Thr1) | |
| 126 IdLC=subset(IdSp,IdSp$IdProb<=Thr1) | |
| 127 IdExtrap=c(IdExtrap,IdEL) | |
| 128 TypeE=c(TypeE,rep(1,length(IdEL))) | |
| 129 IdC2=rbind(IdC2,IdLC) | |
| 130 | |
| 131 | |
| 132 }else{ | |
| 133 #case 2B2: all validations concerns errors | |
| 134 #id are extrapolated on time only | |
| 135 Vtemp=Vtemp[order(Vtemp$TimeNum),] | |
| 136 cuts <- c(-Inf, Vtemp$TimeNum[-1]-diff(Vtemp$TimeNum)/2, Inf) | |
| 137 CorrV=findInterval(IdSp$TimeNum, cuts) | |
| 138 IdE=Vtemp$IdV[CorrV] | |
| 139 IdExtrap=c(IdExtrap,IdE) | |
| 140 TypeE=c(TypeE,rep(1,length(IdE))) | |
| 141 IdC2=rbind(IdC2,IdSp) | |
| 142 } | |
| 143 } | |
| 144 | |
| 145 | |
| 146 } | |
| 147 | |
| 148 #print(paste(j,nrow(IdC2),length(IdExtrap))) | |
| 149 | |
| 150 } | |
| 151 test1=(nrow(IdC2)==length(IdExtrap)) | |
| 152 test2=(nrow(IdC2)==nrow(IdCorrect)) | |
| 153 if((test1==F)|(test2==F)) | |
| 154 { | |
| 155 (stop("Erreur de traitement !!!")) | |
| 156 } | |
| 157 | |
| 158 IdC2$IdExtrap=IdExtrap | |
| 159 IdC2$TypeE=TypeE | |
| 160 | |
| 161 | |
| 162 IdC2=IdC2[order(IdC2$IdProb,decreasing=T),] | |
| 163 IdC2=IdC2[order(IdC2$ConfV,decreasing=T),] | |
| 164 IdC2=IdC2[order(IdC2$`nom du fichier`),] | |
| 165 #discard duplicated species within the same files (= false positives corrected by 2nd layer) | |
| 166 IdC2=unique(IdC2,by=c("nom du fichier","IdExtrap")) | |
| 167 | |
| 168 | |
| 169 | |
| 170 write.table(IdC2,"IdValidTidy.tabular",row.names=F,sep="\t") |
