From a4aef26e556e82d06723d187de6c19db2ce60c7e Mon Sep 17 00:00:00 2001
From: Etienne Rifa <etienne.rifa[at]insa-toulouse.fr>
Date: Tue, 11 May 2021 12:42:40 +0200
Subject: [PATCH 1/5] allow assign with more than 2 DBs

---
 R/assign_fasta_fun.R              | 103 +++++++++++++++++++-----------
 R/idtaxaDB_formatting_functions.R |   4 +-
 2 files changed, 70 insertions(+), 37 deletions(-)

diff --git a/R/assign_fasta_fun.R b/R/assign_fasta_fun.R
index c536a80..70972f6 100644
--- a/R/assign_fasta_fun.R
+++ b/R/assign_fasta_fun.R
@@ -45,39 +45,54 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co
   }
 
   if(length(db_list)>1){
-    flog.info('Merging both annotation...')
+    flog.info('Merging all annotations...')
+
+    Fannot = list()
     for (seq_name in names(dna)){
-      #for each seq
-      if(length(taxid_list[[1]][[seq_name]]$taxon) == 0 & length(taxid_list[[2]][[seq_name]]$taxon) == 0){
-        taxid_list[[3]][[seq_name]]$taxon = rep("unassigned", 8)
-        taxid_list[[3]][[seq_name]]$confidence = rep(0, 8)
+      print(seq_name)
+      # create list
+      L1=list(); L2=list()
+      for (i in 1:length(db_list)){
+        L1[[i]] =  taxid_list[[i]][[seq_name]]$taxon
+        L2[[i]] =  taxid_list[[i]][[seq_name]]$confidence
+      }
+
+      depth1 = sapply(L1, length)
+      if ( all(depth1 == 0) ){
+        print("unassigned")
+        Fannot[[seq_name]]$taxon = rep("unassigned", 8)
+        Fannot[[seq_name]]$confidence = rep(0, 8)
       } else {
-        if(length(taxid_list[[1]][[seq_name]]$taxon) == length(taxid_list[[2]][[seq_name]]$taxon)){
-          #if same assignation depth, test on confidence
-          last_rank <- length(taxid_list[[1]][[seq_name]]$taxon)
-          if(taxid_list[[1]][[seq_name]]$confidence[last_rank] > taxid_list[[2]][[seq_name]]$confidence[last_rank]){
-            taxid_list[[3]][[seq_name]] <- taxid_list[[1]][[seq_name]] #DB1
-          } else{
-            taxid_list[[3]][[seq_name]] <- taxid_list[[2]][[seq_name]] #DB2
-          }
+        if(length(unique(depth1)) == 1){
+          print("best conf")
+        #if same assignation depth, test on confidence
+        last_rank <- length(taxid_list[[1]][[seq_name]]$taxon)
+        conf1 = sapply(L2, function(x){x[last_rank]})
+        Fannot[[seq_name]] <- taxid_list[[which(conf1 == max(conf1))[1]]][[seq_name]] # ..[1] in case of same depth / same conf.
         } else {
-          #if different assignation depth
-          if(length(taxid_list[[1]][[seq_name]]$taxon) > length(taxid_list[[2]][[seq_name]]$taxon)){
-            taxid_list[[3]][[seq_name]] <- taxid_list[[1]][[seq_name]] #DB1
-          } else{
-            taxid_list[[3]][[seq_name]] <- taxid_list[[2]][[seq_name]] #DB2
+
+          if( any( table(depth1) > 1 ) ){
+            # if some equal depth
+            print("some equal depth, best conf")
+            last_rank <- max(sapply(L1, length))
+            maxDepth_list = L2[which(depth1 == max(depth1))]
+            conf1 = sapply(L2, function(x){x[last_rank]}); conf1[is.na(conf1)] = 0
+            Fannot[[seq_name]] <- taxid_list[[which(conf1 == max(conf1))]][[seq_name]]
+          }else{
+            #if all different assignation depth
+            print("best depth")
+            Fannot[[seq_name]] <- taxid_list[[which(depth1 == max(depth1))]][[seq_name]]
           }
         }
-
       }
-
+      print(length(Fannot))
     }
     flog.info('Done.')
 
     flog.info('Output table...')
     #Process output table with different assignations.
     all_annot_tab = data.frame(row.names=names(taxid_list[[1]]))
-    for (i in 1:3){
+    for (i in 1:length(db_list)){
       if(i<3){print(db_list[i])}else{print("Final")}
       ALLassign <- sapply(taxid_list[[i]],function (id) {
         paste(id$taxon," (",round(id$confidence, digits=1),"%)",sep="",collapse="; ")
@@ -85,17 +100,21 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co
       all_annot_tab[,i] <- ALLassign
       if(verbose == 3){print(head(all_annot_tab))}
     }
+    FINALassign <- sapply(Fannot,function (id) {
+      paste(id$taxon," (",round(id$confidence, digits=1),"%)",sep="",collapse="; ")
+    })
+    all_annot_tab$FINAL = FINALassign
 
     seq_tab <- data.frame(sequences=as.character(dna))
     row.names(seq_tab) <- names(dna)
     final_tax_table <- merge(all_annot_tab, seq_tab, by = "row.names")
 
 
-    colnames(final_tax_table)=c("ASV",db_list[1], db_list[2], "FINAL", "sequences")
+    colnames(final_tax_table)=c("ASV",db_list, "FINAL", "sequences")
     write.table(final_tax_table, paste(output,"/allDB_tax_table.csv",sep=""), quote = FALSE, sep = "\t", row.names = FALSE)
 
     # Keep merged assignment (taxid_list[[3]])
-    taxid <- sapply(taxid_list[[3]], function(x) {
+    taxid <- sapply(Fannot, function(x) {
       taxa <- rep(NA,7)
       assign <- x$taxon
       # print(assign)
@@ -133,7 +152,7 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co
       taxid[[nn]] = taxid[[nn]][1:7]
     }
   }
-  taxid <- t(as.data.frame(taxid))
+  taxid <- as.data.frame(t(taxid))
   flog.info('Done.')
 
   if(verbose == 3){
@@ -145,33 +164,45 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co
   flog.info("Filling missing taxonomy ranks...")
   PREFIX = c("k__","p__","c__","o__","f__","g__","s__")
 
-  if( length(grep("p__",taxid[,2])) == 0 ){
-    taxid = fill_tax_fun(taxid, prefix = FALSE)
-    taxid = as.data.frame( t(apply(taxid, 1, function(x){ paste(PREFIX, x, sep="")})), stringAsFactors = FALSE)
-  }else{
-    taxid = fill_tax_fun(taxid, prefix = TRUE)
-  }
+  noprefix_taxid = taxid[grep("k__",taxid[,1], invert = TRUE),]
+  prefix_taxid = taxid[grep("k__",taxid[,1]),]
+
+  noprefix_taxid = fill_tax_fun(noprefix_taxid, prefix = FALSE)
+  noprefix_taxid = as.data.frame( t(apply(noprefix_taxid, 1, function(x){ paste(PREFIX, x, sep="")})), stringAsFactors = FALSE)
+  names(noprefix_taxid) = names(taxid)
+
+  prefix_taxid = fill_tax_fun(prefix_taxid, prefix = TRUE)
+
+  filled_taxid = rbind.data.frame(noprefix_taxid, prefix_taxid)
+  tax.table = filled_taxid[names(dna),]
+
+  # if( length(grep("p__",taxid[,2])) == 0 ){
+  #   taxid = fill_tax_fun(taxid, prefix = FALSE)
+  #   taxid = as.data.frame( t(apply(taxid, 1, function(x){ paste(PREFIX, x, sep="")})), stringAsFactors = FALSE)
+  # }else{
+  #   taxid = fill_tax_fun(taxid, prefix = TRUE)
+  # }
   flog.info('Done.')
 
-  names(taxid) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species")
-  tax.table <- tax_table(as.matrix(taxid))
+  # names(taxid) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species")
+  # tax.table <- tax_table(as.matrix(taxid))
 
 
   flog.info("Check taxonomy consistency...")
-  tax.table = check_tax_fun(tax.table, output = NULL)
+  tax.tablecheck = check_tax_fun(tax.table, output = NULL, verbose = 3)
   flog.info("Done.")
 
   #Output table 2
-  Tab2 = apply( as.data.frame(tax.table, stringsAsFactors=FALSE) , 1 , paste , collapse = ";" )
+  Tab2 = apply( as.data.frame(tax.tablecheck, stringsAsFactors=FALSE) , 1 , paste , collapse = ";" )
   Tab2 <- cbind(Tab2, seq=as.character(dna))
   write.table(Tab2, paste(output,"/final_tax_table.csv",sep=""), quote = FALSE, sep = "\t",
               col.names = FALSE, row.names = TRUE)
 
   flog.info('Saving R objects.')
-  save(tax.table,  file=paste(output,'/robjects.Rdata',sep=''))
+  save(tax.tablecheck,  file=paste(output,'/robjects.Rdata',sep=''))
   flog.info('Finish.')
 
-  if(returnval){return(tax.table)}
+  if(returnval){return(tax.tablecheck)}
 
 
 }
diff --git a/R/idtaxaDB_formatting_functions.R b/R/idtaxaDB_formatting_functions.R
index 7cb1156..049095b 100644
--- a/R/idtaxaDB_formatting_functions.R
+++ b/R/idtaxaDB_formatting_functions.R
@@ -101,15 +101,17 @@ check_tax_fun <- function(taxtable = taxtable, output = NULL, rank = 7, verbose=
           ftax <- names(uniqTax2[order(uniqTax2,decreasing=TRUE)])[1]
           ftax <- unlist(strsplit(ftax,";"))
 
+
           cat( glue::glue( "CORRECTED :
                       {paste(ftax, collapse = ';')}" )
           ); cat("\n")
           #Change taxonomy with final ftax. the most common in taxtable
           for(j in row.names(taxtable[taxtable[,rk]==i,])){
             if(rk == 7){
+              # print(ftax)
               taxtable[j,] = ftax
             }else{
-              taxtable[j,] = c(ftax, taxtable[j,(rk+1):ncol(taxtable)])
+              taxtable[j,] = c(ftax, as.matrix(taxtable[j,(rk+1):ncol(taxtable)]) )
             }
           }
         }
-- 
GitLab


From a62f83094a89fd745ac90be9b70bd8f2e8cd6c02 Mon Sep 17 00:00:00 2001
From: Etienne Rifa <etienne.rifa[at]insa-toulouse.fr>
Date: Wed, 12 May 2021 10:29:43 +0200
Subject: [PATCH 2/5] debug prefix assign

---
 R/assign_fasta_fun.R              | 44 +++++++++++++------------------
 R/idtaxaDB_formatting_functions.R |  1 -
 2 files changed, 19 insertions(+), 26 deletions(-)

diff --git a/R/assign_fasta_fun.R b/R/assign_fasta_fun.R
index 70972f6..6569517 100644
--- a/R/assign_fasta_fun.R
+++ b/R/assign_fasta_fun.R
@@ -45,11 +45,12 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co
   }
 
   if(length(db_list)>1){
+    # Multiple references
     flog.info('Merging all annotations...')
 
     Fannot = list()
     for (seq_name in names(dna)){
-      print(seq_name)
+      if(verbose == 3){message(seq_name)}
       # create list
       L1=list(); L2=list()
       for (i in 1:length(db_list)){
@@ -59,12 +60,12 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co
 
       depth1 = sapply(L1, length)
       if ( all(depth1 == 0) ){
-        print("unassigned")
+        if(verbose == 3){message("unassigned")}
         Fannot[[seq_name]]$taxon = rep("unassigned", 8)
         Fannot[[seq_name]]$confidence = rep(0, 8)
       } else {
         if(length(unique(depth1)) == 1){
-          print("best conf")
+          if(verbose == 3){message("best conf")}
         #if same assignation depth, test on confidence
         last_rank <- length(taxid_list[[1]][[seq_name]]$taxon)
         conf1 = sapply(L2, function(x){x[last_rank]})
@@ -73,19 +74,19 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co
 
           if( any( table(depth1) > 1 ) ){
             # if some equal depth
-            print("some equal depth, best conf")
+            if(verbose == 3){message("some equal depth, best conf")}
             last_rank <- max(sapply(L1, length))
             maxDepth_list = L2[which(depth1 == max(depth1))]
             conf1 = sapply(L2, function(x){x[last_rank]}); conf1[is.na(conf1)] = 0
             Fannot[[seq_name]] <- taxid_list[[which(conf1 == max(conf1))]][[seq_name]]
           }else{
             #if all different assignation depth
-            print("best depth")
+            if(verbose == 3){message("best depth")}
             Fannot[[seq_name]] <- taxid_list[[which(depth1 == max(depth1))]][[seq_name]]
           }
         }
       }
-      print(length(Fannot))
+      if(verbose == 3){message(length(Fannot))}
     }
     flog.info('Done.')
 
@@ -113,7 +114,7 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co
     colnames(final_tax_table)=c("ASV",db_list, "FINAL", "sequences")
     write.table(final_tax_table, paste(output,"/allDB_tax_table.csv",sep=""), quote = FALSE, sep = "\t", row.names = FALSE)
 
-    # Keep merged assignment (taxid_list[[3]])
+    # Keep merged assignment (Fannot)
     taxid <- sapply(Fannot, function(x) {
       taxa <- rep(NA,7)
       assign <- x$taxon
@@ -127,7 +128,6 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co
       return(taxa)
     })
 
-    # taxid <- as.data.frame(taxid)
 
   }else{
     # One DB assignment
@@ -163,33 +163,27 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co
   names(taxid) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species")
   flog.info("Filling missing taxonomy ranks...")
   PREFIX = c("k__","p__","c__","o__","f__","g__","s__")
-
+  # handling possible prefix
   noprefix_taxid = taxid[grep("k__",taxid[,1], invert = TRUE),]
   prefix_taxid = taxid[grep("k__",taxid[,1]),]
 
-  noprefix_taxid = fill_tax_fun(noprefix_taxid, prefix = FALSE)
-  noprefix_taxid = as.data.frame( t(apply(noprefix_taxid, 1, function(x){ paste(PREFIX, x, sep="")})), stringAsFactors = FALSE)
-  names(noprefix_taxid) = names(taxid)
-
-  prefix_taxid = fill_tax_fun(prefix_taxid, prefix = TRUE)
+  if(nrow(noprefix_taxid) != 0){
+    noprefix_taxid = fill_tax_fun(noprefix_taxid, prefix = FALSE)
+    noprefix_taxid = as.data.frame( t(apply(noprefix_taxid, 1, function(x){ paste(PREFIX, x, sep="")})), stringAsFactors = FALSE)
+    names(noprefix_taxid) = names(taxid)
+  }
 
+  if(nrow(prefix_taxid) != 0){
+    prefix_taxid = fill_tax_fun(prefix_taxid, prefix = TRUE)
+  }
   filled_taxid = rbind.data.frame(noprefix_taxid, prefix_taxid)
+
   tax.table = filled_taxid[names(dna),]
 
-  # if( length(grep("p__",taxid[,2])) == 0 ){
-  #   taxid = fill_tax_fun(taxid, prefix = FALSE)
-  #   taxid = as.data.frame( t(apply(taxid, 1, function(x){ paste(PREFIX, x, sep="")})), stringAsFactors = FALSE)
-  # }else{
-  #   taxid = fill_tax_fun(taxid, prefix = TRUE)
-  # }
   flog.info('Done.')
 
-  # names(taxid) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species")
-  # tax.table <- tax_table(as.matrix(taxid))
-
-
   flog.info("Check taxonomy consistency...")
-  tax.tablecheck = check_tax_fun(tax.table, output = NULL, verbose = 3)
+  tax.tablecheck = check_tax_fun(tax.table, output = NULL, rank = 6, verbose = 3)
   flog.info("Done.")
 
   #Output table 2
diff --git a/R/idtaxaDB_formatting_functions.R b/R/idtaxaDB_formatting_functions.R
index 049095b..b0ce69a 100644
--- a/R/idtaxaDB_formatting_functions.R
+++ b/R/idtaxaDB_formatting_functions.R
@@ -67,7 +67,6 @@ fill_tax_fun <- function(taxtable = taxtable, prefix = TRUE){
 
 check_tax_fun <- function(taxtable = taxtable, output = NULL, rank = 7, verbose=3, returnval = TRUE){
   RANKS = c("_domain","_phylum","_class","_order","_family","_genus","_species")
-  print("Check taxonomy consistency...")
   # Check for multiple ancestors at each rank, choose first occurence for each problematic taxon
   sink(paste('./check_tax_fun.log', sep=""), split = TRUE)
   for(rk in rank:2){
-- 
GitLab


From e525fcf667a064b729748aad639255c736c14e98 Mon Sep 17 00:00:00 2001
From: Etienne Rifa <etienne.rifa[at]insa-toulouse.fr>
Date: Tue, 8 Jun 2021 16:40:08 +0200
Subject: [PATCH 3/5] fix redundant codes integrate idtaxa_assign_fasta_fun
 into assign_taxo,same process

---
 R/assign_fasta_fun.R |   2 +-
 R/assign_taxo_fun.R  | 155 ++-----------------------------------------
 2 files changed, 5 insertions(+), 152 deletions(-)

diff --git a/R/assign_fasta_fun.R b/R/assign_fasta_fun.R
index 6569517..2a49fa5 100644
--- a/R/assign_fasta_fun.R
+++ b/R/assign_fasta_fun.R
@@ -21,7 +21,7 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co
 
   if(!dir.exists(output)){
     flog.debug('Creating output directory...')
-    dir.create(output)
+    dir.create(output, recursive = TRUE)
     flog.debug('Done.')
   }
 
diff --git a/R/assign_taxo_fun.R b/R/assign_taxo_fun.R
index fc427d3..2de2499 100644
--- a/R/assign_taxo_fun.R
+++ b/R/assign_taxo_fun.R
@@ -32,162 +32,15 @@ assign_taxo_fun <- function(dada_res = dada_res,  output = "./idtaxa/", id_db =
   }
 
 
-  if(!dir.exists(output)){
-    flog.debug('Creating output directory...')
-    dir.create(output)
-    flog.debug('Done.')
-  }
 
   ## IDATAXA assignment
-  flog.info(paste('Taxonomy assignation with IDTAXA',sep=''))
   dna <- DNAStringSet(getSequences(dada_res$seqtab.nochim))
+  names(dna) <- sapply(colnames(dada_res$seqtab.nochim), digest::digest, algo="md5")
 
+  if(!all( names(dna) == rownames(dada_res$otu.table) )){stop("Seq names and ASV table row names are different")}
 
-  db_list <- unlist(strsplit(id_db,","))
-  taxid_list=vector("list", length(db_list)+1)
-  for (i in 1:length(db_list)){
-    db_file <- db_list[i]
-    taxid_list[[i]] <- idTaxa_assign(db_file, dna, colnames(dada_res$seqtab.export), confidence)
-  }
-
-  if(verbose == 3){
-    save(taxid_list, file = "./annot_taxid_list_debug.rdata")
-  }
-
-  if(length(db_list)>1){
-    flog.info('Merging both annotation...')
-    for (seq_name in colnames(dada_res$seqtab.export)){
-      #for each seq
-      if(length(taxid_list[[1]][[seq_name]]$taxon) == 0 & length(taxid_list[[2]][[seq_name]]$taxon) == 0){
-        taxid_list[[3]][[seq_name]]$taxon = rep("unassigned", 8)
-        taxid_list[[3]][[seq_name]]$confidence = rep(0, 8)
-      } else {
-        if(length(taxid_list[[1]][[seq_name]]$taxon) == length(taxid_list[[2]][[seq_name]]$taxon)){
-          #if same assignation depth, test on confidence
-          last_rank <- length(taxid_list[[1]][[seq_name]]$taxon)
-          if(taxid_list[[1]][[seq_name]]$confidence[last_rank] > taxid_list[[2]][[seq_name]]$confidence[last_rank]){
-            taxid_list[[3]][[seq_name]] <- taxid_list[[1]][[seq_name]] #DB1
-          } else{
-            taxid_list[[3]][[seq_name]] <- taxid_list[[2]][[seq_name]] #DB2
-          }
-        } else {
-          #if different assignation depth
-          if(length(taxid_list[[1]][[seq_name]]$taxon) > length(taxid_list[[2]][[seq_name]]$taxon)){
-            taxid_list[[3]][[seq_name]] <- taxid_list[[1]][[seq_name]] #DB1
-          } else{
-            taxid_list[[3]][[seq_name]] <- taxid_list[[2]][[seq_name]] #DB2
-          }
-        }
-
-      }
-
-    }
-    flog.info('Done.')
-
-    flog.info('Output table...')
-    #Process output table with different assignations.
-    all_annot_tab = data.frame(row.names=names(taxid_list[[1]]))
-    for (i in 1:3){
-      if(i<3){print(db_list[i])}else{print("Final")}
-      ALLassign <- sapply(taxid_list[[i]],function (id) {
-        paste(id$taxon," (",round(id$confidence, digits=1),"%)",sep="",collapse="; ")
-      })
-      all_annot_tab[,i] <- ALLassign
-      if(verbose == 3){print(head(all_annot_tab))}
-    }
-
-    seq_tab <- data.frame(sequences=as.character(dna))
-    row.names(seq_tab) <- colnames(dada_res$seqtab.export)
-    final_tax_table <- merge(all_annot_tab, seq_tab, by = "row.names")
-
-
-    colnames(final_tax_table)=c("ASV",db_list[1], db_list[2], "FINAL", "sequences")
-    write.table(final_tax_table, paste(output,"/allDB_tax_table.csv",sep=""), quote = FALSE, sep = "\t", row.names = FALSE)
-
-    # Keep merged assignment (taxid_list[[3]])
-    taxid <- sapply(taxid_list[[3]], function(x) {
-      taxa <- rep(NA,7)
-      assign <- x$taxon
-      # print(assign)
-      if(length(assign[-1])==0){
-        taxa <- rep("unassigned",7)
-      } else {
-        taxa[1:length(assign[-1])] <- assign[-1]
-      }
-
-      return(taxa)
-    })
-
-    # taxid <- as.data.frame(taxid)
-
-  }else{
-    # One DB assignment
-    taxid <- sapply(taxid_list[[1]], function(x) {
-      taxa <- rep(NA,7)
-      assign <- x$taxon
-      # print(assign)
-      if(length(assign[-1])==0){
-        taxa <- rep("unassigned",7)
-      } else {
-        taxa[1:length(assign[-1])] <- assign[-1]
-      }
-
-      return(taxa)
-    })
-  }
-
-  if(max(sapply(taxid, length)) > 7){
-    LL=sapply(taxid, length)
-    Tnames=names(LL[LL>7])
-    for(nn in Tnames){
-      taxid[[nn]] = taxid[[nn]][1:7]
-    }
-  }
-  FeatNames = colnames(taxid); if(is.null(FeatNames)){FeatNames = names(taxid)}
-  taxid <- t(as.data.frame(taxid))
-  row.names(taxid) = FeatNames
-  flog.info('Done.')
-
-  if(verbose == 3){
-    save(taxid, file = "./annot_taxid_debug.rdata")
-  }
-
-
-  # Filling taxonomy with last assigned rank.
-  names(taxid) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species")
-  flog.info("Filling missing taxonomy ranks...")
-  PREFIX = c("k__","p__","c__","o__","f__","g__","s__")
-
-  if( length(grep("p__",taxid[,2])) == 0 ){
-    taxid = fill_tax_fun(taxid, prefix = FALSE)
-    taxid = as.data.frame( t(apply(taxid, 1, function(x){ paste(PREFIX, x, sep="")})), stringAsFactors = FALSE)
-  }else{
-    taxid = fill_tax_fun(taxid, prefix = TRUE)
-  }
-
-
-  flog.info('Done.')
-
-  names(taxid) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species")
-  tax.table <- tax_table(as.matrix(taxid))
-
-
-  flog.info("Check taxonomy consistency...")
-  tax.table = check_tax_fun(tax.table, output = NULL)
-  flog.info("Done.")
-
-  #Output table 2
-  Tab2 = apply( as.data.frame(tax.table, stringsAsFactors=FALSE) , 1 , paste , collapse = ";" )
-  Tab2 <- cbind(Tab2, seq=colnames(dada_res$seqtab.nochim))
-  write.table(Tab2, paste(output,"/final_tax_table.csv",sep=""), quote = FALSE, sep = "\t",
-              col.names = FALSE, row.names = TRUE)
-
-  flog.info('Saving R objects.')
-  save(tax.table,  file=paste(output,'/robjects.Rdata',sep=''))
-  flog.info('Finish.')
-
-  if(returnval){return(tax.table)}
-
+  tt2 = idtaxa_assign_fasta_fun(fasta = dna, id_db = id_db,
+        output = output, confidence = confidence, verbose = verbose, returnval = returnval)
 
 
 }
-- 
GitLab


From bb3e4f2ae1bc6df7055c55960cf6026781484b5d Mon Sep 17 00:00:00 2001
From: Sebastien Theil <sebastien.theil@inra.fr>
Date: Tue, 8 Jun 2021 15:54:55 +0000
Subject: [PATCH 4/5] Update assign_fasta_fun.R

Using logger instead of base message() function. Remove some prints.
---
 R/assign_fasta_fun.R | 42 ++++++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/R/assign_fasta_fun.R b/R/assign_fasta_fun.R
index 2a49fa5..969c30b 100644
--- a/R/assign_fasta_fun.R
+++ b/R/assign_fasta_fun.R
@@ -18,7 +18,9 @@
 #' @export
 
 idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", confidence = 50, verbose = 1, returnval = TRUE){
-
+  if(verbose == 3){
+    flog.threshold(DEBUG)
+  }
   if(!dir.exists(output)){
     flog.debug('Creating output directory...')
     dir.create(output, recursive = TRUE)
@@ -50,7 +52,8 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co
 
     Fannot = list()
     for (seq_name in names(dna)){
-      if(verbose == 3){message(seq_name)}
+      flog.debug(seq_name)
+      
       # create list
       L1=list(); L2=list()
       for (i in 1:length(db_list)){
@@ -60,33 +63,35 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co
 
       depth1 = sapply(L1, length)
       if ( all(depth1 == 0) ){
-        if(verbose == 3){message("unassigned")}
+        flog.debug("unassigned")
         Fannot[[seq_name]]$taxon = rep("unassigned", 8)
         Fannot[[seq_name]]$confidence = rep(0, 8)
-      } else {
+      }
+      else {
         if(length(unique(depth1)) == 1){
-          if(verbose == 3){message("best conf")}
-        #if same assignation depth, test on confidence
-        last_rank <- length(taxid_list[[1]][[seq_name]]$taxon)
-        conf1 = sapply(L2, function(x){x[last_rank]})
-        Fannot[[seq_name]] <- taxid_list[[which(conf1 == max(conf1))[1]]][[seq_name]] # ..[1] in case of same depth / same conf.
-        } else {
-
+          flog.debug("best conf")
+          #if same assignation depth, test on confidence
+          last_rank <- length(taxid_list[[1]][[seq_name]]$taxon)
+          conf1 = sapply(L2, function(x){x[last_rank]})
+          Fannot[[seq_name]] <- taxid_list[[which(conf1 == max(conf1))[1]]][[seq_name]] # ..[1] in case of same depth / same conf.
+        }
+        else {
           if( any( table(depth1) > 1 ) ){
             # if some equal depth
-            if(verbose == 3){message("some equal depth, best conf")}
+            flog.debug("some equal depth, best conf")
             last_rank <- max(sapply(L1, length))
             maxDepth_list = L2[which(depth1 == max(depth1))]
             conf1 = sapply(L2, function(x){x[last_rank]}); conf1[is.na(conf1)] = 0
             Fannot[[seq_name]] <- taxid_list[[which(conf1 == max(conf1))]][[seq_name]]
-          }else{
+          }
+          else{
             #if all different assignation depth
-            if(verbose == 3){message("best depth")}
+            flog.debug("best depth")
             Fannot[[seq_name]] <- taxid_list[[which(depth1 == max(depth1))]][[seq_name]]
           }
         }
       }
-      if(verbose == 3){message(length(Fannot))}
+      flog.debug(paste('length Fannot:',length(Fannot)))
     }
     flog.info('Done.')
 
@@ -94,23 +99,24 @@ idtaxa_assign_fasta_fun <- function(fasta, id_db, output = "./assign_fasta/", co
     #Process output table with different assignations.
     all_annot_tab = data.frame(row.names=names(taxid_list[[1]]))
     for (i in 1:length(db_list)){
-      if(i<3){print(db_list[i])}else{print("Final")}
+
       ALLassign <- sapply(taxid_list[[i]],function (id) {
         paste(id$taxon," (",round(id$confidence, digits=1),"%)",sep="",collapse="; ")
       })
       all_annot_tab[,i] <- ALLassign
-      if(verbose == 3){print(head(all_annot_tab))}
+
     }
+
     FINALassign <- sapply(Fannot,function (id) {
       paste(id$taxon," (",round(id$confidence, digits=1),"%)",sep="",collapse="; ")
     })
+
     all_annot_tab$FINAL = FINALassign
 
     seq_tab <- data.frame(sequences=as.character(dna))
     row.names(seq_tab) <- names(dna)
     final_tax_table <- merge(all_annot_tab, seq_tab, by = "row.names")
 
-
     colnames(final_tax_table)=c("ASV",db_list, "FINAL", "sequences")
     write.table(final_tax_table, paste(output,"/allDB_tax_table.csv",sep=""), quote = FALSE, sep = "\t", row.names = FALSE)
 
-- 
GitLab


From 926a669c1b803dfaa865c7c63bf296cf1aa74e07 Mon Sep 17 00:00:00 2001
From: Etienne Rifa <etienne.rifa[at]insa-toulouse.fr>
Date: Tue, 8 Jun 2021 18:11:07 +0200
Subject: [PATCH 5/5] small change

---
 R/idtaxa_assign_fun.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/idtaxa_assign_fun.R b/R/idtaxa_assign_fun.R
index 7e81d36..26e08b4 100644
--- a/R/idtaxa_assign_fun.R
+++ b/R/idtaxa_assign_fun.R
@@ -15,7 +15,7 @@
 #' @export
 
 
-idTaxa_assign = function(db_file, dna, asv_names, confidence, ncpu = NULLS){
+idTaxa_assign = function(db_file, dna, asv_names, confidence, ncpu = NULL){
   flog.info(paste('Using database ',db_file,sep=''))
   toto <- load(db_file)
   ids <- IdTaxa(dna, trainingSet, strand="both", processors=ncpu, verbose=TRUE)
-- 
GitLab