MetaTaxoOPU R tool
library("ape")
library("tidyverse")
#MNp4nj <- read.tree("p4_NJ_copy.tree")
MNp4nj <- read.tree("p4nj_sub_exract1.tree")
distval <- dist.nodes(MNp4nj)
distval[1:5, 1:5]
o <- plot(MNp4nj)
plot(MNp4nj, show.tip.label = F, x.lim = o$x.lim)
tiplabels(MNp4nj$tip.label[grep("Otu*", MNp4nj$tip.label)], grep("Otu*", MNp4nj$tip.label), adj = 0, col = "red", frame = 'none', bg = "none", font = 3, cex = 0.8)
tiplabels(MNp4nj$tip.label[grep("Otu*", MNp4nj$tip.label, invert = T)], grep("Otu*", MNp4nj$tip.label, invert = T), adj = 0, col = "black", frame = 'none', bg = "none", font = 1, cex = 0.8)
OPUnummark <- 1
OPUmark <- "OPU_"
NodeOPU <- paste(OPUmark, OPUnummark, sep = "")
MNp4nj$tip.label[MNp4nj$edge[,][which(MNp4nj$edge[, 1] == 112),]]
## 每个Otu序列所在的节点和节点路径
otu_tip_id <- grep("Otu*", MNp4nj$tip.label)
otutip_nodepath <- nodepath(MNp4nj)[otu_tip_id]
## 获取指定节点(191)下的所有tips label
test1 <- extract.clade(MNp4nj, 191)
## 获取Otu tip所在的上级节点
otutips_secondlevel_node <- lapply(otutip_nodepath, function(el) {
el[length(el) - 1]
})
library("ape")
library("tidyverse")
#MNp4nj <- read.tree("p4_NJ_copy.tree")
MNp4nj <- read.tree("p4nj_sub_exract1.tree")
MNp4nj2 <- read.tree("p4nj_sub_exract3.ntree")
## 每个Otu序列所在的节点和节点路径
otu_tip_id <- grep("Otu*", MNp4nj$tip.label)
otutip_nodepath <- nodepath(MNp4nj2)[otu_tip_id]
## 获取指定节点(191)下的所有tips label
test1 <- extract.clade(MNpMNp4nj24nj, 191)
## 获取Otu tip所在的上级节点
otutips_secondlevel_node <- lapply(otutip_nodepath, function(el) {
el[length(el) - 1]
})
o2 <- plot(MNp4nj2)
plot(MNp4nj2, show.tip.label = F, x.lim = o2$x.lim)
tiplabels(MNp4nj2$tip.label[grep("Otu*", MNp4nj$tip.label)], grep("Otu*", MNp4nj$tip.label), adj = 0, col = "red", frame = 'none', bg = "none", font = 3, cex = 0.8)
tiplabels(MNp4nj2$tip.label[grep("Otu*", MNp4nj$tip.label, invert = T)], grep("Otu*", MNp4nj$tip.label, invert = T), adj = 0, col = "black", frame = 'none', bg = "none", font = 1, cex = 0.8)
nodelabels(node = c(unlist(otutips_secondlevel_node)), pch = 21, bg = "black", cex = 1)
nodelabels(MNp4nj2$node.label, adj = c(0.5), col = "black", frame = 'none', bg = "none", font = 1, cex = 0.8)
taxo_name_decidor <- function(consensus_linegae, unculted = FALSE) {
levels_mark <- c("bacteria", "phylum", "class", "order", "family", "genus", "species")
#unclasified <- "Uncult"
consensus_linegae_level <- length(consensus_linegae)
suffix <- ifelse(consensus_linegae_level < length(levels_mark),
ifelse(unculted == TRUE,
str_c("uncult", levels_mark[consensus_linegae_level], consensus_linegae[consensus_linegae_level], sep = "_"),
str_c(levels_mark[consensus_linegae_level], consensus_linegae[consensus_linegae_level], sep = "_")),
consensus_linegae[consensus_linegae_level])
return(suffix)
}
NodeOPU_namer <- function(prefix = "OPU", OPUnummark = " ", suffix = " ") {
NodeOPU_nam <- str_c(prefix = "OPU", OPUnummark, suffix, sep = "_")
NodeOPU_nam <- str_replace(NodeOPU_nam, " ", "_")
return(NodeOPU_nam)
}
OPUnummark <- 0
Node_OPU_Names <- c()
for (ele in unlist(otutips_secondlevel_node)) {
uncult_mark <- FALSE
temp_node_tipslabel_lineage <- c() #用来临时记录某节点在的分类的 Lineage
OPUnummark <- OPUnummark + 1
OPUnummarkstr <- ifelse(OPUnummark <= 9, paste("00", OPUnummark, sep = ""), ifelse(OPUnummark <= 99, paste("0", OPUnummark, sep = ""), OPUnummark))
tmptre <- extract.clade(MNp4nj2, ele, root.edge = 0, collapse.singles = TRUE, interactive = FALSE)
#cat("Node no. has :\n", paste(tmptre$tip.label, sep = "\n"), "\n")
for (tip in tmptre$tip.label) {
tmptiptax <- NULL
len <- NULL
if (str_detect(tip, "Unclassified")) {
tmptiptax <- str_split(tip, "___", simplify = TRUE)
len <- length(tmptiptax)
lineage <- str_split(tip, "___", simplify = TRUE)[len]
#cat(lineage, "\n")
uncult_mark <- TRUE
temp_node_tipslabel_lineage <- c(temp_node_tipslabel_lineage, lineage)
} else {
tmptiptax <- str_split(tip, "__", simplify = TRUE)
len <- length(tmptiptax)
#last_levelname <- str_split(tmptiptax, " ", simplify = TRUE)[2]
#cat(tmptiptax,"\n")
if (len > 2) {
if (str_split(tmptiptax, " ", simplify = TRUE)[3] != "Unclassified") {
last_levelname <- str_split(tmptiptax, " ", simplify = TRUE)[2]
last_levelname <- str_replace(last_levelname, "_", " ")
lineage <- str_split(tmptiptax, " ", simplify = TRUE)[len]
lineage <- str_c(lineage, last_levelname)
#uncult_mark <- str_split(tmptiptax, " ", simplify = TRUE)[2]
uncult_mark <- FALSE
temp_node_tipslabel_lineage <- c(temp_node_tipslabel_lineage, lineage)
} else {
#last_levelname <- str_split(tmptiptax, " ", simplify = TRUE)[2]
#last_levelname <- str_replace(last_levelname, "_", " ")
lineage <- str_split(tmptiptax, " ", simplify = TRUE)[len]
uncult_mark <- TRUE
temp_node_tipslabel_lineage <- c(temp_node_tipslabel_lineage, lineage)
}
} else {
last_levelname <- str_split(tmptiptax, " ", simplify = TRUE)[2]
last_levelname <- str_replace(last_levelname, "_", " ")
lineage <- str_split(tmptiptax, " ", simplify = TRUE)[len]
lineage <- str_c(lineage, last_levelname)
uncult_mark <- FALSE
temp_node_tipslabel_lineage <- c(temp_node_tipslabel_lineage, lineage)
}
}
}
# cat(temp_node_tipslabel_lineage, "\n")
temp_consensus_linegae <- c()
for (ele in temp_node_tipslabel_lineage) {
if (!str_detect(ele, "^OTU_")) {
## 排除 OTU 序列的 lineage 信息
tmplineage <- str_split(ele, "_", simplify = TRUE)
if (tmplineage[length(tmplineage)] == "uncultured") {
tmplineage = tmplineage[1:(length(tmplineage) - 1)]
}
#cat(tmplineage, "\n")
if (is.null(temp_consensus_linegae)) {
temp_consensus_linegae = tmplineage
} else {
## 用交集的形式获取 一致性 lineage 信息
temp_consensus_linegae <- intersect(temp_consensus_linegae, tmplineage)
}
}
#tmplineage <- str_replace(tmplineage, " ", "")
#cat(temp_consensus_linegae, "\n")
}
taxo_name <- taxo_name_decidor(temp_consensus_linegae, uncult_mark)
NodeOPU_namerstr <- NodeOPU_namer(prefix = "OPU", OPUnummark = OPUnummarkstr, suffix = taxo_name)
#cat(NodeOPU_namerstr, "\n")
Node_OPU_Names <- c(Node_OPU_Names, NodeOPU_namerstr)
temp_node_tipslabel_lineage <- c() #清除临时记录的 lineage
}
o2 <- plot(MNp4nj2)
plot(MNp4nj2, show.tip.label = F, x.lim = o2$x.lim)
tiplabels(MNp4nj2$tip.label[grep("Otu*", MNp4nj$tip.label)], grep("Otu*", MNp4nj$tip.label), adj = 0, col = "red", frame = 'none', bg = "none", font = 3, cex = 0.7)
tiplabels(MNp4nj2$tip.label[grep("Otu*", MNp4nj$tip.label, invert = T)], grep("Otu*", MNp4nj$tip.label, invert = T), adj = 0, col = "black", frame = 'none', bg = "none", font = 1, cex = 0.7)
nodelabels(node = c(unlist(otutips_secondlevel_node)), pch = 21, bg = "#0000FF55", cex = 0.8)
nodelabels(text = Node_OPU_Names, node = c(unlist(otutips_secondlevel_node)), adj = c(0.5), col = "black", frame = 'none', bg = "none", font = 1, cex = 0.7)
Written on January 1, 2021