From a4aef26e556e82d06723d187de6c19db2ce60c7e Mon Sep 17 00:00:00 2001 From: Etienne Rifa <etienne.rifa[at]insa-toulouse.fr> Date: Tue, 11 May 2021 12:42:40 +0200 Subject: [PATCH 1/5] allow assign with more than 2 DBs --- R/assign_fasta_fun.R | 103 +++++++++++++++++++----------- R/idtaxaDB_formatting_functions.R | 4 +- 2 files changed, 70 insertions(+), 37 deletions(-) diff --git a/R/assign_fasta_fun.R b/R/assign_fasta_fun.R index c536a80..70972f6 100644 --- a/R/assign_fasta_fun.R +++ b/R/assign_fasta_fun.R @@ -45,39 +45,54 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co } if(length(db_list)>1){ - flog.info('Merging both annotation...') + flog.info('Merging all annotations...') + + Fannot = list() for (seq_name in names(dna)){ - #for each seq - if(length(taxid_list[[1]][[seq_name]]$taxon) == 0 & length(taxid_list[[2]][[seq_name]]$taxon) == 0){ - taxid_list[[3]][[seq_name]]$taxon = rep("unassigned", 8) - taxid_list[[3]][[seq_name]]$confidence = rep(0, 8) + print(seq_name) + # create list + L1=list(); L2=list() + for (i in 1:length(db_list)){ + L1[[i]] = taxid_list[[i]][[seq_name]]$taxon + L2[[i]] = taxid_list[[i]][[seq_name]]$confidence + } + + depth1 = sapply(L1, length) + if ( all(depth1 == 0) ){ + print("unassigned") + Fannot[[seq_name]]$taxon = rep("unassigned", 8) + Fannot[[seq_name]]$confidence = rep(0, 8) } else { - if(length(taxid_list[[1]][[seq_name]]$taxon) == length(taxid_list[[2]][[seq_name]]$taxon)){ - #if same assignation depth, test on confidence - last_rank <- length(taxid_list[[1]][[seq_name]]$taxon) - if(taxid_list[[1]][[seq_name]]$confidence[last_rank] > taxid_list[[2]][[seq_name]]$confidence[last_rank]){ - taxid_list[[3]][[seq_name]] <- taxid_list[[1]][[seq_name]] #DB1 - } else{ - taxid_list[[3]][[seq_name]] <- taxid_list[[2]][[seq_name]] #DB2 - } + if(length(unique(depth1)) == 1){ + print("best conf") + #if same assignation depth, test on confidence + last_rank <- length(taxid_list[[1]][[seq_name]]$taxon) + conf1 = sapply(L2, function(x){x[last_rank]}) + Fannot[[seq_name]] <- taxid_list[[which(conf1 == max(conf1))[1]]][[seq_name]] # ..[1] in case of same depth / same conf. } else { - #if different assignation depth - if(length(taxid_list[[1]][[seq_name]]$taxon) > length(taxid_list[[2]][[seq_name]]$taxon)){ - taxid_list[[3]][[seq_name]] <- taxid_list[[1]][[seq_name]] #DB1 - } else{ - taxid_list[[3]][[seq_name]] <- taxid_list[[2]][[seq_name]] #DB2 + + if( any( table(depth1) > 1 ) ){ + # if some equal depth + print("some equal depth, best conf") + last_rank <- max(sapply(L1, length)) + maxDepth_list = L2[which(depth1 == max(depth1))] + conf1 = sapply(L2, function(x){x[last_rank]}); conf1[is.na(conf1)] = 0 + Fannot[[seq_name]] <- taxid_list[[which(conf1 == max(conf1))]][[seq_name]] + }else{ + #if all different assignation depth + print("best depth") + Fannot[[seq_name]] <- taxid_list[[which(depth1 == max(depth1))]][[seq_name]] } } - } - + print(length(Fannot)) } flog.info('Done.') flog.info('Output table...') #Process output table with different assignations. all_annot_tab = data.frame(row.names=names(taxid_list[[1]])) - for (i in 1:3){ + for (i in 1:length(db_list)){ if(i<3){print(db_list[i])}else{print("Final")} ALLassign <- sapply(taxid_list[[i]],function (id) { paste(id$taxon," (",round(id$confidence, digits=1),"%)",sep="",collapse="; ") @@ -85,17 +100,21 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co all_annot_tab[,i] <- ALLassign if(verbose == 3){print(head(all_annot_tab))} } + FINALassign <- sapply(Fannot,function (id) { + paste(id$taxon," (",round(id$confidence, digits=1),"%)",sep="",collapse="; ") + }) + all_annot_tab$FINAL = FINALassign seq_tab <- data.frame(sequences=as.character(dna)) row.names(seq_tab) <- names(dna) final_tax_table <- merge(all_annot_tab, seq_tab, by = "row.names") - colnames(final_tax_table)=c("ASV",db_list[1], db_list[2], "FINAL", "sequences") + colnames(final_tax_table)=c("ASV",db_list, "FINAL", "sequences") write.table(final_tax_table, paste(output,"/allDB_tax_table.csv",sep=""), quote = FALSE, sep = "\t", row.names = FALSE) # Keep merged assignment (taxid_list[[3]]) - taxid <- sapply(taxid_list[[3]], function(x) { + taxid <- sapply(Fannot, function(x) { taxa <- rep(NA,7) assign <- x$taxon # print(assign) @@ -133,7 +152,7 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co taxid[[nn]] = taxid[[nn]][1:7] } } - taxid <- t(as.data.frame(taxid)) + taxid <- as.data.frame(t(taxid)) flog.info('Done.') if(verbose == 3){ @@ -145,33 +164,45 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co flog.info("Filling missing taxonomy ranks...") PREFIX = c("k__","p__","c__","o__","f__","g__","s__") - if( length(grep("p__",taxid[,2])) == 0 ){ - taxid = fill_tax_fun(taxid, prefix = FALSE) - taxid = as.data.frame( t(apply(taxid, 1, function(x){ paste(PREFIX, x, sep="")})), stringAsFactors = FALSE) - }else{ - taxid = fill_tax_fun(taxid, prefix = TRUE) - } + noprefix_taxid = taxid[grep("k__",taxid[,1], invert = TRUE),] + prefix_taxid = taxid[grep("k__",taxid[,1]),] + + noprefix_taxid = fill_tax_fun(noprefix_taxid, prefix = FALSE) + noprefix_taxid = as.data.frame( t(apply(noprefix_taxid, 1, function(x){ paste(PREFIX, x, sep="")})), stringAsFactors = FALSE) + names(noprefix_taxid) = names(taxid) + + prefix_taxid = fill_tax_fun(prefix_taxid, prefix = TRUE) + + filled_taxid = rbind.data.frame(noprefix_taxid, prefix_taxid) + tax.table = filled_taxid[names(dna),] + + # if( length(grep("p__",taxid[,2])) == 0 ){ + # taxid = fill_tax_fun(taxid, prefix = FALSE) + # taxid = as.data.frame( t(apply(taxid, 1, function(x){ paste(PREFIX, x, sep="")})), stringAsFactors = FALSE) + # }else{ + # taxid = fill_tax_fun(taxid, prefix = TRUE) + # } flog.info('Done.') - names(taxid) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species") - tax.table <- tax_table(as.matrix(taxid)) + # names(taxid) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species") + # tax.table <- tax_table(as.matrix(taxid)) flog.info("Check taxonomy consistency...") - tax.table = check_tax_fun(tax.table, output = NULL) + tax.tablecheck = check_tax_fun(tax.table, output = NULL, verbose = 3) flog.info("Done.") #Output table 2 - Tab2 = apply( as.data.frame(tax.table, stringsAsFactors=FALSE) , 1 , paste , collapse = ";" ) + Tab2 = apply( as.data.frame(tax.tablecheck, stringsAsFactors=FALSE) , 1 , paste , collapse = ";" ) Tab2 <- cbind(Tab2, seq=as.character(dna)) write.table(Tab2, paste(output,"/final_tax_table.csv",sep=""), quote = FALSE, sep = "\t", col.names = FALSE, row.names = TRUE) flog.info('Saving R objects.') - save(tax.table, file=paste(output,'/robjects.Rdata',sep='')) + save(tax.tablecheck, file=paste(output,'/robjects.Rdata',sep='')) flog.info('Finish.') - if(returnval){return(tax.table)} + if(returnval){return(tax.tablecheck)} } diff --git a/R/idtaxaDB_formatting_functions.R b/R/idtaxaDB_formatting_functions.R index 7cb1156..049095b 100644 --- a/R/idtaxaDB_formatting_functions.R +++ b/R/idtaxaDB_formatting_functions.R @@ -101,15 +101,17 @@ check_tax_fun <- function(taxtable = taxtable, output = NULL, rank = 7, verbose= ftax <- names(uniqTax2[order(uniqTax2,decreasing=TRUE)])[1] ftax <- unlist(strsplit(ftax,";")) + cat( glue::glue( "CORRECTED : {paste(ftax, collapse = ';')}" ) ); cat("\n") #Change taxonomy with final ftax. the most common in taxtable for(j in row.names(taxtable[taxtable[,rk]==i,])){ if(rk == 7){ + # print(ftax) taxtable[j,] = ftax }else{ - taxtable[j,] = c(ftax, taxtable[j,(rk+1):ncol(taxtable)]) + taxtable[j,] = c(ftax, as.matrix(taxtable[j,(rk+1):ncol(taxtable)]) ) } } } -- GitLab From a62f83094a89fd745ac90be9b70bd8f2e8cd6c02 Mon Sep 17 00:00:00 2001 From: Etienne Rifa <etienne.rifa[at]insa-toulouse.fr> Date: Wed, 12 May 2021 10:29:43 +0200 Subject: [PATCH 2/5] debug prefix assign --- R/assign_fasta_fun.R | 44 +++++++++++++------------------ R/idtaxaDB_formatting_functions.R | 1 - 2 files changed, 19 insertions(+), 26 deletions(-) diff --git a/R/assign_fasta_fun.R b/R/assign_fasta_fun.R index 70972f6..6569517 100644 --- a/R/assign_fasta_fun.R +++ b/R/assign_fasta_fun.R @@ -45,11 +45,12 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co } if(length(db_list)>1){ + # Multiple references flog.info('Merging all annotations...') Fannot = list() for (seq_name in names(dna)){ - print(seq_name) + if(verbose == 3){message(seq_name)} # create list L1=list(); L2=list() for (i in 1:length(db_list)){ @@ -59,12 +60,12 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co depth1 = sapply(L1, length) if ( all(depth1 == 0) ){ - print("unassigned") + if(verbose == 3){message("unassigned")} Fannot[[seq_name]]$taxon = rep("unassigned", 8) Fannot[[seq_name]]$confidence = rep(0, 8) } else { if(length(unique(depth1)) == 1){ - print("best conf") + if(verbose == 3){message("best conf")} #if same assignation depth, test on confidence last_rank <- length(taxid_list[[1]][[seq_name]]$taxon) conf1 = sapply(L2, function(x){x[last_rank]}) @@ -73,19 +74,19 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co if( any( table(depth1) > 1 ) ){ # if some equal depth - print("some equal depth, best conf") + if(verbose == 3){message("some equal depth, best conf")} last_rank <- max(sapply(L1, length)) maxDepth_list = L2[which(depth1 == max(depth1))] conf1 = sapply(L2, function(x){x[last_rank]}); conf1[is.na(conf1)] = 0 Fannot[[seq_name]] <- taxid_list[[which(conf1 == max(conf1))]][[seq_name]] }else{ #if all different assignation depth - print("best depth") + if(verbose == 3){message("best depth")} Fannot[[seq_name]] <- taxid_list[[which(depth1 == max(depth1))]][[seq_name]] } } } - print(length(Fannot)) + if(verbose == 3){message(length(Fannot))} } flog.info('Done.') @@ -113,7 +114,7 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co colnames(final_tax_table)=c("ASV",db_list, "FINAL", "sequences") write.table(final_tax_table, paste(output,"/allDB_tax_table.csv",sep=""), quote = FALSE, sep = "\t", row.names = FALSE) - # Keep merged assignment (taxid_list[[3]]) + # Keep merged assignment (Fannot) taxid <- sapply(Fannot, function(x) { taxa <- rep(NA,7) assign <- x$taxon @@ -127,7 +128,6 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co return(taxa) }) - # taxid <- as.data.frame(taxid) }else{ # One DB assignment @@ -163,33 +163,27 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co names(taxid) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species") flog.info("Filling missing taxonomy ranks...") PREFIX = c("k__","p__","c__","o__","f__","g__","s__") - + # handling possible prefix noprefix_taxid = taxid[grep("k__",taxid[,1], invert = TRUE),] prefix_taxid = taxid[grep("k__",taxid[,1]),] - noprefix_taxid = fill_tax_fun(noprefix_taxid, prefix = FALSE) - noprefix_taxid = as.data.frame( t(apply(noprefix_taxid, 1, function(x){ paste(PREFIX, x, sep="")})), stringAsFactors = FALSE) - names(noprefix_taxid) = names(taxid) - - prefix_taxid = fill_tax_fun(prefix_taxid, prefix = TRUE) + if(nrow(noprefix_taxid) != 0){ + noprefix_taxid = fill_tax_fun(noprefix_taxid, prefix = FALSE) + noprefix_taxid = as.data.frame( t(apply(noprefix_taxid, 1, function(x){ paste(PREFIX, x, sep="")})), stringAsFactors = FALSE) + names(noprefix_taxid) = names(taxid) + } + if(nrow(prefix_taxid) != 0){ + prefix_taxid = fill_tax_fun(prefix_taxid, prefix = TRUE) + } filled_taxid = rbind.data.frame(noprefix_taxid, prefix_taxid) + tax.table = filled_taxid[names(dna),] - # if( length(grep("p__",taxid[,2])) == 0 ){ - # taxid = fill_tax_fun(taxid, prefix = FALSE) - # taxid = as.data.frame( t(apply(taxid, 1, function(x){ paste(PREFIX, x, sep="")})), stringAsFactors = FALSE) - # }else{ - # taxid = fill_tax_fun(taxid, prefix = TRUE) - # } flog.info('Done.') - # names(taxid) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species") - # tax.table <- tax_table(as.matrix(taxid)) - - flog.info("Check taxonomy consistency...") - tax.tablecheck = check_tax_fun(tax.table, output = NULL, verbose = 3) + tax.tablecheck = check_tax_fun(tax.table, output = NULL, rank = 6, verbose = 3) flog.info("Done.") #Output table 2 diff --git a/R/idtaxaDB_formatting_functions.R b/R/idtaxaDB_formatting_functions.R index 049095b..b0ce69a 100644 --- a/R/idtaxaDB_formatting_functions.R +++ b/R/idtaxaDB_formatting_functions.R @@ -67,7 +67,6 @@ fill_tax_fun <- function(taxtable = taxtable, prefix = TRUE){ check_tax_fun <- function(taxtable = taxtable, output = NULL, rank = 7, verbose=3, returnval = TRUE){ RANKS = c("_domain","_phylum","_class","_order","_family","_genus","_species") - print("Check taxonomy consistency...") # Check for multiple ancestors at each rank, choose first occurence for each problematic taxon sink(paste('./check_tax_fun.log', sep=""), split = TRUE) for(rk in rank:2){ -- GitLab From e525fcf667a064b729748aad639255c736c14e98 Mon Sep 17 00:00:00 2001 From: Etienne Rifa <etienne.rifa[at]insa-toulouse.fr> Date: Tue, 8 Jun 2021 16:40:08 +0200 Subject: [PATCH 3/5] fix redundant codes integrate idtaxa_assign_fasta_fun into assign_taxo,same process --- R/assign_fasta_fun.R | 2 +- R/assign_taxo_fun.R | 155 ++----------------------------------------- 2 files changed, 5 insertions(+), 152 deletions(-) diff --git a/R/assign_fasta_fun.R b/R/assign_fasta_fun.R index 6569517..2a49fa5 100644 --- a/R/assign_fasta_fun.R +++ b/R/assign_fasta_fun.R @@ -21,7 +21,7 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co if(!dir.exists(output)){ flog.debug('Creating output directory...') - dir.create(output) + dir.create(output, recursive = TRUE) flog.debug('Done.') } diff --git a/R/assign_taxo_fun.R b/R/assign_taxo_fun.R index fc427d3..2de2499 100644 --- a/R/assign_taxo_fun.R +++ b/R/assign_taxo_fun.R @@ -32,162 +32,15 @@ assign_taxo_fun <- function(dada_res = dada_res, output = "./idtaxa/", id_db = } - if(!dir.exists(output)){ - flog.debug('Creating output directory...') - dir.create(output) - flog.debug('Done.') - } ## IDATAXA assignment - flog.info(paste('Taxonomy assignation with IDTAXA',sep='')) dna <- DNAStringSet(getSequences(dada_res$seqtab.nochim)) + names(dna) <- sapply(colnames(dada_res$seqtab.nochim), digest::digest, algo="md5") + if(!all( names(dna) == rownames(dada_res$otu.table) )){stop("Seq names and ASV table row names are different")} - db_list <- unlist(strsplit(id_db,",")) - taxid_list=vector("list", length(db_list)+1) - for (i in 1:length(db_list)){ - db_file <- db_list[i] - taxid_list[[i]] <- idTaxa_assign(db_file, dna, colnames(dada_res$seqtab.export), confidence) - } - - if(verbose == 3){ - save(taxid_list, file = "./annot_taxid_list_debug.rdata") - } - - if(length(db_list)>1){ - flog.info('Merging both annotation...') - for (seq_name in colnames(dada_res$seqtab.export)){ - #for each seq - if(length(taxid_list[[1]][[seq_name]]$taxon) == 0 & length(taxid_list[[2]][[seq_name]]$taxon) == 0){ - taxid_list[[3]][[seq_name]]$taxon = rep("unassigned", 8) - taxid_list[[3]][[seq_name]]$confidence = rep(0, 8) - } else { - if(length(taxid_list[[1]][[seq_name]]$taxon) == length(taxid_list[[2]][[seq_name]]$taxon)){ - #if same assignation depth, test on confidence - last_rank <- length(taxid_list[[1]][[seq_name]]$taxon) - if(taxid_list[[1]][[seq_name]]$confidence[last_rank] > taxid_list[[2]][[seq_name]]$confidence[last_rank]){ - taxid_list[[3]][[seq_name]] <- taxid_list[[1]][[seq_name]] #DB1 - } else{ - taxid_list[[3]][[seq_name]] <- taxid_list[[2]][[seq_name]] #DB2 - } - } else { - #if different assignation depth - if(length(taxid_list[[1]][[seq_name]]$taxon) > length(taxid_list[[2]][[seq_name]]$taxon)){ - taxid_list[[3]][[seq_name]] <- taxid_list[[1]][[seq_name]] #DB1 - } else{ - taxid_list[[3]][[seq_name]] <- taxid_list[[2]][[seq_name]] #DB2 - } - } - - } - - } - flog.info('Done.') - - flog.info('Output table...') - #Process output table with different assignations. - all_annot_tab = data.frame(row.names=names(taxid_list[[1]])) - for (i in 1:3){ - if(i<3){print(db_list[i])}else{print("Final")} - ALLassign <- sapply(taxid_list[[i]],function (id) { - paste(id$taxon," (",round(id$confidence, digits=1),"%)",sep="",collapse="; ") - }) - all_annot_tab[,i] <- ALLassign - if(verbose == 3){print(head(all_annot_tab))} - } - - seq_tab <- data.frame(sequences=as.character(dna)) - row.names(seq_tab) <- colnames(dada_res$seqtab.export) - final_tax_table <- merge(all_annot_tab, seq_tab, by = "row.names") - - - colnames(final_tax_table)=c("ASV",db_list[1], db_list[2], "FINAL", "sequences") - write.table(final_tax_table, paste(output,"/allDB_tax_table.csv",sep=""), quote = FALSE, sep = "\t", row.names = FALSE) - - # Keep merged assignment (taxid_list[[3]]) - taxid <- sapply(taxid_list[[3]], function(x) { - taxa <- rep(NA,7) - assign <- x$taxon - # print(assign) - if(length(assign[-1])==0){ - taxa <- rep("unassigned",7) - } else { - taxa[1:length(assign[-1])] <- assign[-1] - } - - return(taxa) - }) - - # taxid <- as.data.frame(taxid) - - }else{ - # One DB assignment - taxid <- sapply(taxid_list[[1]], function(x) { - taxa <- rep(NA,7) - assign <- x$taxon - # print(assign) - if(length(assign[-1])==0){ - taxa <- rep("unassigned",7) - } else { - taxa[1:length(assign[-1])] <- assign[-1] - } - - return(taxa) - }) - } - - if(max(sapply(taxid, length)) > 7){ - LL=sapply(taxid, length) - Tnames=names(LL[LL>7]) - for(nn in Tnames){ - taxid[[nn]] = taxid[[nn]][1:7] - } - } - FeatNames = colnames(taxid); if(is.null(FeatNames)){FeatNames = names(taxid)} - taxid <- t(as.data.frame(taxid)) - row.names(taxid) = FeatNames - flog.info('Done.') - - if(verbose == 3){ - save(taxid, file = "./annot_taxid_debug.rdata") - } - - - # Filling taxonomy with last assigned rank. - names(taxid) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species") - flog.info("Filling missing taxonomy ranks...") - PREFIX = c("k__","p__","c__","o__","f__","g__","s__") - - if( length(grep("p__",taxid[,2])) == 0 ){ - taxid = fill_tax_fun(taxid, prefix = FALSE) - taxid = as.data.frame( t(apply(taxid, 1, function(x){ paste(PREFIX, x, sep="")})), stringAsFactors = FALSE) - }else{ - taxid = fill_tax_fun(taxid, prefix = TRUE) - } - - - flog.info('Done.') - - names(taxid) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species") - tax.table <- tax_table(as.matrix(taxid)) - - - flog.info("Check taxonomy consistency...") - tax.table = check_tax_fun(tax.table, output = NULL) - flog.info("Done.") - - #Output table 2 - Tab2 = apply( as.data.frame(tax.table, stringsAsFactors=FALSE) , 1 , paste , collapse = ";" ) - Tab2 <- cbind(Tab2, seq=colnames(dada_res$seqtab.nochim)) - write.table(Tab2, paste(output,"/final_tax_table.csv",sep=""), quote = FALSE, sep = "\t", - col.names = FALSE, row.names = TRUE) - - flog.info('Saving R objects.') - save(tax.table, file=paste(output,'/robjects.Rdata',sep='')) - flog.info('Finish.') - - if(returnval){return(tax.table)} - + tt2 = idtaxa_assign_fasta_fun(fasta = dna, id_db = id_db, + output = output, confidence = confidence, verbose = verbose, returnval = returnval) } -- GitLab From bb3e4f2ae1bc6df7055c55960cf6026781484b5d Mon Sep 17 00:00:00 2001 From: Sebastien Theil <sebastien.theil@inra.fr> Date: Tue, 8 Jun 2021 15:54:55 +0000 Subject: [PATCH 4/5] Update assign_fasta_fun.R Using logger instead of base message() function. Remove some prints. --- R/assign_fasta_fun.R | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/R/assign_fasta_fun.R b/R/assign_fasta_fun.R index 2a49fa5..969c30b 100644 --- a/R/assign_fasta_fun.R +++ b/R/assign_fasta_fun.R @@ -18,7 +18,9 @@ #' @export idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", confidence = 50, verbose = 1, returnval = TRUE){ - + if(verbose == 3){ + flog.threshold(DEBUG) + } if(!dir.exists(output)){ flog.debug('Creating output directory...') dir.create(output, recursive = TRUE) @@ -50,7 +52,8 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co Fannot = list() for (seq_name in names(dna)){ - if(verbose == 3){message(seq_name)} + flog.debug(seq_name) + # create list L1=list(); L2=list() for (i in 1:length(db_list)){ @@ -60,33 +63,35 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co depth1 = sapply(L1, length) if ( all(depth1 == 0) ){ - if(verbose == 3){message("unassigned")} + flog.debug("unassigned") Fannot[[seq_name]]$taxon = rep("unassigned", 8) Fannot[[seq_name]]$confidence = rep(0, 8) - } else { + } + else { if(length(unique(depth1)) == 1){ - if(verbose == 3){message("best conf")} - #if same assignation depth, test on confidence - last_rank <- length(taxid_list[[1]][[seq_name]]$taxon) - conf1 = sapply(L2, function(x){x[last_rank]}) - Fannot[[seq_name]] <- taxid_list[[which(conf1 == max(conf1))[1]]][[seq_name]] # ..[1] in case of same depth / same conf. - } else { - + flog.debug("best conf") + #if same assignation depth, test on confidence + last_rank <- length(taxid_list[[1]][[seq_name]]$taxon) + conf1 = sapply(L2, function(x){x[last_rank]}) + Fannot[[seq_name]] <- taxid_list[[which(conf1 == max(conf1))[1]]][[seq_name]] # ..[1] in case of same depth / same conf. + } + else { if( any( table(depth1) > 1 ) ){ # if some equal depth - if(verbose == 3){message("some equal depth, best conf")} + flog.debug("some equal depth, best conf") last_rank <- max(sapply(L1, length)) maxDepth_list = L2[which(depth1 == max(depth1))] conf1 = sapply(L2, function(x){x[last_rank]}); conf1[is.na(conf1)] = 0 Fannot[[seq_name]] <- taxid_list[[which(conf1 == max(conf1))]][[seq_name]] - }else{ + } + else{ #if all different assignation depth - if(verbose == 3){message("best depth")} + flog.debug("best depth") Fannot[[seq_name]] <- taxid_list[[which(depth1 == max(depth1))]][[seq_name]] } } } - if(verbose == 3){message(length(Fannot))} + flog.debug(paste('length Fannot:',length(Fannot))) } flog.info('Done.') @@ -94,23 +99,24 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co #Process output table with different assignations. all_annot_tab = data.frame(row.names=names(taxid_list[[1]])) for (i in 1:length(db_list)){ - if(i<3){print(db_list[i])}else{print("Final")} + ALLassign <- sapply(taxid_list[[i]],function (id) { paste(id$taxon," (",round(id$confidence, digits=1),"%)",sep="",collapse="; ") }) all_annot_tab[,i] <- ALLassign - if(verbose == 3){print(head(all_annot_tab))} + } + FINALassign <- sapply(Fannot,function (id) { paste(id$taxon," (",round(id$confidence, digits=1),"%)",sep="",collapse="; ") }) + all_annot_tab$FINAL = FINALassign seq_tab <- data.frame(sequences=as.character(dna)) row.names(seq_tab) <- names(dna) final_tax_table <- merge(all_annot_tab, seq_tab, by = "row.names") - colnames(final_tax_table)=c("ASV",db_list, "FINAL", "sequences") write.table(final_tax_table, paste(output,"/allDB_tax_table.csv",sep=""), quote = FALSE, sep = "\t", row.names = FALSE) -- GitLab From 926a669c1b803dfaa865c7c63bf296cf1aa74e07 Mon Sep 17 00:00:00 2001 From: Etienne Rifa <etienne.rifa[at]insa-toulouse.fr> Date: Tue, 8 Jun 2021 18:11:07 +0200 Subject: [PATCH 5/5] small change --- R/idtaxa_assign_fun.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/idtaxa_assign_fun.R b/R/idtaxa_assign_fun.R index 7e81d36..26e08b4 100644 --- a/R/idtaxa_assign_fun.R +++ b/R/idtaxa_assign_fun.R @@ -15,7 +15,7 @@ #' @export -idTaxa_assign = function(db_file, dna, asv_names, confidence, ncpu = NULLS){ +idTaxa_assign = function(db_file, dna, asv_names, confidence, ncpu = NULL){ flog.info(paste('Using database ',db_file,sep='')) toto <- load(db_file) ids <- IdTaxa(dna, trainingSet, strand="both", processors=ncpu, verbose=TRUE) -- GitLab