Skip to content

Commit

Permalink
many updates
Browse files Browse the repository at this point in the history
  • Loading branch information
noriakis committed Sep 26, 2024
1 parent 18f67dd commit 1f2a7d9
Show file tree
Hide file tree
Showing 59 changed files with 202 additions and 107 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ Version: 0.99.0
License: GPL-3
Description: The package aims to analyze intra-species diversity from metagenomics. The package supports the loading functions for single nucleotide variants and copy number variant profiles, filtering, and statistical analysis.
Authors@R: person("Noriaki", "Sato", email = "[email protected]", role = c("cre", "aut"))
Depends: ggplot2, ggstar, ggraph, igraph
Imports: GetoptLong, BiocFileCache, RCurl, vegan, methods, data.table, phangorn, RColorBrewer, ggtree, circlize, ComplexHeatmap, ggkegg, ape, dplyr, exactRankTests, ggblend, ggh4x, scales, tidygraph, ggplotify, ggtreeExtra, ggnewscale, scico, MKmisc, NMF, pillar, cowplot, patchwork, reshape2, Boruta, tidyr, stringr, clusterProfiler, grDevices, stats, utils
Suggests: simplifyEnrichment, knitr, rmarkdown, NNLM, shiny, BiocStyle, matrixStats
Depends: ggplot2, ggraph, igraph, vegan, phangorn
Imports: GetoptLong, BiocFileCache, RCurl, methods, data.table, RColorBrewer, ggtree, circlize, ComplexHeatmap, ggkegg, ape, dplyr, exactRankTests, ggblend, ggh4x, scales, tidygraph, ggplotify, ggtreeExtra, ggnewscale, scico, MKmisc, NMF, pillar, cowplot, patchwork, reshape2, Boruta, tidyr, stringr, clusterProfiler, grDevices, stats, utils, BiocStyle, ggstar
Suggests: simplifyEnrichment, knitr, rmarkdown, NNLM, shiny, matrixStats
RoxygenNote: 7.3.2
biocViews: Microbiome
VignetteBuilder: knitr
9 changes: 5 additions & 4 deletions R/NMF.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@
#' @param nnlm_na_perc NA frequency when the cross-validation is performed.
#' @param tss perform total sum scaling to the matrix
#' @param remove_na_perc remove features with too many NA
#' (features with NA below {remove_na_perc} * overall sample numbers will be retained)
#' (features with NA below remove_na_perc * overall sample numbers will be retained)
#' @importFrom NMF nmf nmfEstimateRank
#' @export
#' @return stana object
NMF <- function(stana, species, rank=3, target="kos", seed=53, method="snmf/r",
deleteZeroDepth=FALSE, beta=0.01, estimate=FALSE, estimate_range=1:6, nnlm_flag=FALSE,
nnlm_args=list(), nnlm_na_perc=0.3, tss=FALSE, remove_na_perc=NULL) {
Expand Down Expand Up @@ -106,7 +107,7 @@ NMF <- function(stana, species, rank=3, target="kos", seed=53, method="snmf/r",

err <- sapply(X = estimate_range,
FUN = function(k) {
z <- nnmf(A2, k);
z <- NNLM::nnmf(A2, k);
c(mean((with(z, W %*% H)[ind] - A[ind])^2), tail(z$mse, 1));
}
);
Expand Down Expand Up @@ -158,7 +159,7 @@ NMF <- function(stana, species, rank=3, target="kos", seed=53, method="snmf/r",
res <- NMF::nmf(mat, rank = rank, seed = seed, method=method)
}
coefMat <- coef(res)
basisMat <- basis(res)
basisMat <- NMF::basis(res)
}

stana@coefMat[[species]] <- data.frame(coefMat)
Expand Down Expand Up @@ -207,7 +208,7 @@ plotStackedBarPlot <- function(stana, sp, by="NMF") {
## Not showing sample label, instead facet by group
ggplot(melted, aes(fill=variable, y=value, x=sample)) +
geom_col(position="fill")+
facet_grid(. ~ group, scale="free")+
facet_grid(. ~ group, scales="free")+
scale_y_continuous(expand = expansion(mult = c(0, 0.05)))+
cowplot::theme_cowplot()+ cowplot::panel_border()+
theme(axis.text.x = element_blank())
Expand Down
50 changes: 50 additions & 0 deletions R/addGeneAbundance.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#' addGeneAbundance
#'
#' Add the specified gene copy number to metadata
#'
#' @param stana stana object
#' @param candSp candidate species
#' @param IDs gene IDs to add
#' @param target KO or genes
#' @param how how to combine multiple IDs
#' @param newCol new column name
#' @param discNumeric convert discrete value to numeric
#' @param disc discretize the abundance by the threshold. function for calculating
#' threshold, like median
#' @param convert conversion such as log10
#' @export
#' @return stana object
addGeneAbundance <- function(stana, candSp, IDs,
target="KO", how=sum, newCol="gene",
disc=NULL, discNumeric=TRUE, convert=NULL) {
if (target=="KO") {
ints <- intersect(row.names(stana@kos[[candSp]]), IDs)
subMat <- stana@kos[[candSp]][ints, ]
} else {
ints <- intersect(row.names(stana@genes[[candSp]]), IDs)
subMat <- stana@genes[[candSp]][ints, ]
}
if (length(IDs)>1) {
adda <- apply(subMat, 2, how)
} else {
adda <- subMat
}
nm <- names(adda)
if (!is.null(disc)) {
thresh <- do.call(disc, list("x"=adda))
if (discNumeric) {
adda <- as.numeric(adda > thresh)
} else {
adda <- adda > thresh
}
names(adda) <- nm
}
meta <- stana@meta
if (!is.null(convert)) {
adda <- do.call(convert, list(x=adda))
}
meta[[newCol]] <- adda[row.names(stana@meta)]
stana@meta <- meta
return(stana)
}

2 changes: 0 additions & 2 deletions R/alleleStat.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

#' @noRd
alleleStat_metaSNV <- function(stana, sp, cl, deleteZeroDepth) {
statList <- list()
Expand Down Expand Up @@ -43,7 +42,6 @@ alleleStat_metaSNV <- function(stana, sp, cl, deleteZeroDepth) {
#' @param deleteZeroDepth delete snvs with zero depth
#' @export
#' @return list of statistical values
#'
alleleStat <- function(stana, sp=NULL, cl=NULL, base="maj",
deleteZeroDepth=FALSE) {
if (is.null(sp)) {sp <- stana@ids[1]}
Expand Down
2 changes: 1 addition & 1 deletion R/annot.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ anno_eggNOG_keywords <- function(split, genes, tib,
})

lt <- lapply(gene_list, function(x) {
(tib |> dplyr::filter(ID %in% x))$V2
(tib |> dplyr::filter(.data$ID %in% x))$V2
})

names(lt) <- names(gene_list)
Expand Down
6 changes: 4 additions & 2 deletions R/calcGF.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@


#' calcGF
#'
#' Calculate gene family profile
#'
#' @param stana stana object
#' @param candSp candidate species ID
#' @param how how to summarize multiple gene CN assigned to the same KO
#' @param annot eggNOG or manual
#' @param column When eggNOG, which family to summarize, default to KEGG_ko
#' @export
#' @return stana object
calcGF <- function(stana, candSp=NULL, how=sum, annot="eggNOG", column="KEGG_ko") {
checkID(stana, candSp)
if (is.null(candSp)) {cat("Species not specified, the first ID will be used:", stana@ids[1]);
Expand Down
14 changes: 7 additions & 7 deletions R/checkEGGNOG.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
#' "KEGG_rclass" "BRITE" "KEGG_TC" "CAZy" "BiGG_Reaction"
#' "PFAMs"
#' @export
#' @return list containing ggplot2 object and graph
#'
drawEGGNOG <- function(annot_file, geneIDs, candPlot) {
retList <- list()
egng <- checkEGGNOG(annot_file,"all", geneIDs)
# delete map* entry (in KEGG)
egng <- egng[!grepl("map",egng$value),]
print(egng)
ap <- NULL
for (i in geneIDs) {
tmp <- egng[egng$ID == i,]
Expand Down Expand Up @@ -87,15 +87,15 @@ checkEGGNOG <- function(annot_file, ret="all", checkIDs=NULL, fill=TRUE) {
if (ret!="all") {
parsed <- ann[,c("#query",ret), with=FALSE] |>
tidyr::pivot_longer(-1) |>
dplyr::filter(value!="-") |>
dplyr::mutate(value = strsplit(as.character(value), ",")) |>
tidyr::unnest(value)
dplyr::filter(.data$value!="-") |>
dplyr::mutate(value = strsplit(as.character(.data$value), ",")) |>
tidyr::unnest(.data$value)
} else {
parsed <- ann[, !c("evalue","score"), with=FALSE] |>
tidyr::pivot_longer(-1) |>
dplyr::filter(value!="-") |>
dplyr::mutate(value = strsplit(as.character(value), ",")) |>
tidyr::unnest(value)
dplyr::filter(.data$value!="-") |>
dplyr::mutate(value = strsplit(as.character(.data$value), ",")) |>
tidyr::unnest(.data$value)
}
parsed <- parsed |> `colnames<-`(c("ID","name","value"))
if (!is.null(checkIDs)) {
Expand Down
2 changes: 2 additions & 0 deletions R/checkPATRIC.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#' @import igraph ggraph BiocFileCache RCurl ggplot2
#' @importFrom data.table fread
#' @export
#' @return checkPATRIC results
drawPATRIC <- function(genes,
whichToCount="ec_description",
delSize=0,
Expand Down Expand Up @@ -51,6 +52,7 @@ drawPATRIC <- function(genes,
#' @param skipGraph skip graph plotting
#' @import igraph ggraph BiocFileCache RCurl ggplot2
#' @export
#' @return list of annotations
checkPATRIC <- function(genes,
whichToCount="ec_description",
delSize=0,
Expand Down
1 change: 1 addition & 0 deletions R/checkPATRICSimple.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#' @param whichToCount ec_description, ec_number, pathway_name, pathway_id
#' @import BiocFileCache RCurl
#' @export
#' @return list of annotation
checkPATRICSimple <- function(genes,
whichToCount="ec_description") {
allGenes <- unlist(genes)
Expand Down
2 changes: 1 addition & 1 deletion R/combineGenes.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ combineGenes <- function(stana_list, species) {
if (ovlgr>0) {qqcat("Duplicate label found in group\n")}

new_stana@ids <- species
new_stana@type <- stana@type
new_stana@type <- "Combined"
new_stana@cl <- cls
new_stana@colors <- getColors(cls)

Expand Down
2 changes: 1 addition & 1 deletion R/compareGenes.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' @param verbose_zero show zero abundance genes
#' @import ggplot2
#' @export
#' @return ggplot
#' @return list of statistical results from wilcox.exact
#'
compareGenes <- function(stana, species=NULL, geneID=NULL, cl=NULL, argList=list(),
verbose_zero=FALSE) {
Expand Down
1 change: 1 addition & 0 deletions R/consensusSeq.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#' @param argList parameters, passed to corresponding functions
#' @export
#' @rdname consensusseq
#' @return stana object
#'
consensusSeq <- function(stana,
species=NULL, argList=list()){
Expand Down
12 changes: 9 additions & 3 deletions R/consensusSeqFast.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#' @param allele_columns columns specifying major and minor allele
#' @rdname consensusseq
#' @export
#' @return stana object
consensusSeqGeneral <- function(
stana,
species=NULL,
Expand Down Expand Up @@ -69,7 +70,7 @@ consensusSeqGeneral <- function(

if (return_mat) {
mat <- do.call(cbind, allele_list)
row.names(mat) <- names(site_filters)
# row.names(mat) <- names(site_filters)
return(mat)
}

Expand Down Expand Up @@ -133,10 +134,12 @@ consensusSeqGeneral <- function(
#' @param output_seq whether to output actual FASTA file
#' @param locus_type locus type to be included, default to CDS
#' @param site_list site list to be included
#' @param site_type site types to be included
#' @param return_mat return matrix of characters
#' @param output_seq whether to output actual FASTA file
#' @rdname consensusseq
#' @export
#' @return stana object
consensusSeqMIDAS2 <- function(
stana,
species=NULL,
Expand All @@ -150,6 +153,7 @@ consensusSeqMIDAS2 <- function(
site_prev=0.9,
locus_type="CDS",
site_list=NULL,
site_type=c("1D","2D","3D","4D"),
cl=NULL,
max_sites=Inf,
tree=FALSE,
Expand Down Expand Up @@ -287,7 +291,7 @@ consensusSeqMIDAS2 <- function(
sum(c(
(length(keep_samples) / length(names(SAMPLES))) >= max(c(1e-6, site_prev)),
pooled_maf >= site_maf,
sp_site_type[i] %in% c("1D","2D","3D","4D"),
sp_site_type[i] %in% site_type,
sp_locus_type[i] %in% locus_type))
return(keep_site_filter)
}) |> unlist()
Expand Down Expand Up @@ -391,6 +395,7 @@ consensusSeqMIDAS2 <- function(
#' @importFrom phangorn read.phyDat
#' @rdname consensusseq
#' @export
#' @return stana object
consensusSeqMIDAS1 <- function(
stana,
species=NULL,
Expand All @@ -405,6 +410,7 @@ consensusSeqMIDAS1 <- function(
max_sites=Inf,
tree=FALSE,
locus_type="CDS",
site_type=c("1D","2D","3D","4D"),
keep_samples=NULL,
verbose=FALSE,
site_list=NULL,
Expand Down Expand Up @@ -517,7 +523,7 @@ consensusSeqMIDAS1 <- function(
(length(keep_samples) / length(names(SAMPLES))) >= max(c(1e-6, site_prev)),
pooled_maf >= site_maf,
sp_ref_allele[i] %in% c("A","T","G","C"),
sp_site_type[i] %in% c("1D","2D","3D","4D"),
sp_site_type[i] %in% site_type,
sp_locus_type[i] %in% locus_type))
return(keep_site_filter)
}) |> unlist()
Expand Down
11 changes: 6 additions & 5 deletions R/convertWideToMaf.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ convertWideToMaf <- function(df) {
})
sp <- unique(df$species_id)
ret <- lapply(sp, function(s) {
tmp <- df %>% dplyr::filter(species_id == s)
tmp <- df %>% dplyr::filter(.data$species_id == s)
tmp <- tmp[,c("snv_id", "maf")]
row.names(tmp) <- tmp$snv_id
tmp$snv_id <- NULL
Expand All @@ -43,7 +43,7 @@ convertWideToMaf <- function(df) {
names(ret) <- sp

summary <- lapply(sp, function(s) {
tmp <- df %>% dplyr::filter(species_id == s)
tmp <- df %>% dplyr::filter(.data$species_id == s)
tmp <- tmp[,c("snv_id", "major", "minor")]
row.names(tmp) <- tmp$snv_id
tmp$snv_id <- NULL
Expand All @@ -62,6 +62,7 @@ convertWideToMaf <- function(df) {
#' combine the maf table produced by convertWideToMaf()
#' @param mafs named list of the results of convertWideToMaf()
#' @export
#' @return stana object
combineMaf <- function(mafs) {
if (is.null(names(mafs))) {stop("The list must be named")}
sps <- do.call(union, lapply(names(mafs), function(s) {
Expand All @@ -77,9 +78,9 @@ combineMaf <- function(mafs) {
return(tmp)
}))
tbl <- tidyr::pivot_wider(tmpdf,
id_cols=snv_id,
names_from=sample,
values_from = maf)
id_cols="snv_id",
names_from="sample",
values_from = "maf")
tbl <- data.frame(tbl)
row.names(tbl) <- tbl$snv_id
tbl$snv_id <- NULL
Expand Down
2 changes: 1 addition & 1 deletion R/doAdonis.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#' @importFrom stats as.formula dist
#' @importFrom utils read.table
#' @export
#' @return stana object
doAdonis <- function(stana, specs, cl=NULL,
target="snps", formula=NULL,
distMethod="manhattan", pcoa=FALSE,
Expand Down Expand Up @@ -144,7 +145,6 @@ doAdonis <- function(stana, specs, cl=NULL,
} else {
argList[["data"]] <- met
}

adores <- do.call("adonis2", argList)

if (pr) {
Expand Down
1 change: 1 addition & 0 deletions R/doBoruta.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#' @return list of Boruta object and plot
#' @import ggplot2
#' @export
#' @return list containing Boruta results
doBoruta <- function(stana, sp, cl=NULL, doFix=TRUE,
target="genes", mat=NULL, whichToCount="ec_description", argList=list(),
deleteZeroDepth=FALSE) {
Expand Down
Loading

0 comments on commit 1f2a7d9

Please sign in to comment.