Skip to content

Commit

Permalink
snv summary
Browse files Browse the repository at this point in the history
  • Loading branch information
noriakis committed Apr 29, 2024
1 parent acc6727 commit b30d7eb
Show file tree
Hide file tree
Showing 8 changed files with 157 additions and 254 deletions.
1 change: 1 addition & 0 deletions R/consensusSeq.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#' @param species species vectors
#' @param argList parameters, passed to corresponding functions
#' @export
#' @rdname consensusseq
#'
consensusSeq <- function(stana,
species=NULL, argList=list()){
Expand Down
73 changes: 22 additions & 51 deletions R/consensusSeqFast.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,7 @@

#' consensusSeqGeneral
#' @param stana stana obj
#' @param species candidate species vector
#' @param cl cluster, if plot cladogram
#' @param keep_samples the samples to keep
#' @param tree if TRUE, perform tree inference
#' @param verbose output current status
#' @param output_seq whether to output actual FASTA file
#' @param return_mat return matrix of characters
#' @param allele_columns columns specifying major and minor allele
#' @rdname consensusseq
#' @export
consensusSeqGeneral <- function(
stana,
Expand Down Expand Up @@ -116,27 +109,33 @@ consensusSeqGeneral <- function(


#' consensusSeqMIDAS2Fast
#' @param stana stana obj
#' @param stana stana object
#' @param species candidate species vector
#' @param mean_depth parameter for filtering
#' @param fract_cov parameter for filtering
#' @param site_depth parameter for filtering
#' @param site_ratio parameter for filtering
#' @param site_maf parameter for filtering
#' @param allele_support parameter for filtering
#' @param site_prev parameter for filtering
#' @param mean_depth parameter for sample filtering
#' mean coverage across the sample
#' @param fract_cov parameter for sample filtering
#' fraction of coverage per sample
#' @param site_depth parameter for site filtering
#' minimum site depth
#' @param site_ratio parameter for site filtering
#' depth divided by mean coverage
#' @param site_maf parameter for site filtering
#' the minimum maf to be included
#' @param site_prev parameter for site filtering
#' site prevalence across samples
#' @param allele_support parameter for site filtering
#' @param cl cluster, if plot cladogram
#' @param max_sites default to Inf
#' @param keep_samples currently not implemented
#' @param exclude_samples currently not implemented
#' @param rand_samples currently not implemented
#' @param tree if perform tree inference
#' @param max_samples currently not implemented
#' @param max_sites default to Inf, max sites to be retained
#' @param keep_samples samples to kept in inference
#' @param tree whether or not to perform tree inference using dist.ml()
#' and NJ() in default parameters
#' @param verbose output current status
#' @param output_seq whether to output actual FASTA file
#' @param locus_type locus type to be included, default to CDS
#' @param site_list site list to be included
#' @param return_mat return matrix of characters
#' @param output_seq whether to output actual FASTA file
#' @rdname consensusseq
#' @export
consensusSeqMIDAS2 <- function(
stana,
Expand All @@ -154,10 +153,7 @@ consensusSeqMIDAS2 <- function(
cl=NULL,
max_sites=Inf,
tree=FALSE,
max_samples=Inf,
keep_samples=NULL,
exclude_samples=NULL,
rand_samples=NULL,
return_mat=FALSE,
verbose=FALSE) {

Expand Down Expand Up @@ -390,32 +386,10 @@ consensusSeqMIDAS2 <- function(


#' consensusSeqMIDAS1
#'
#' @param stana stana obj
#' @param species candidate species vector
#' @param mean_depth parameter for filtering
#' @param fract_cov parameter for filtering
#' @param site_depth parameter for filtering
#' @param site_ratio parameter for filtering
#' @param site_maf parameter for filtering
#' @param allele_support parameter for filtering
#' @param site_prev parameter for filtering
#' @param cl cluster, if plot cladogram
#' @param max_sites currently not implemented
#' @param keep_samples currently not implemented
#' @param exclude_samples currently not implemented
#' @param rand_samples currently not implemented
#' @param tree if perform tree inference using dist.ml()
#' and NJ() in default parameters
#' @param max_samples currently not implemented
#' @param verbose print output
#' @param output_seq output the FASTA file
#' @param return_mat return character matrix
#' @param locus_type locus type to be included
#' @param site_list site list to be included
#' @importFrom phangorn read.phyDat dist.ml NJ
#' @import ggtree ggplot2
#' @importFrom phangorn read.phyDat
#' @rdname consensusseq
#' @export
consensusSeqMIDAS1 <- function(
stana,
Expand All @@ -431,10 +405,7 @@ consensusSeqMIDAS1 <- function(
max_sites=Inf,
tree=FALSE,
locus_type="CDS",
max_samples=Inf,
keep_samples=NULL,
exclude_samples=NULL,
rand_samples=NULL,
verbose=FALSE,
site_list=NULL,
return_mat=FALSE,
Expand Down
38 changes: 31 additions & 7 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,42 @@ plotSNVInfo <- function(stana, sp) {
#' @param stana stana object
#' @param sp species_id
#' @param param parameter to plot
#' @param perSample plot using no sample information
#' @export
#' @return ggplot object
plotSNVSummary <- function(stana, sp, param="mean_coverage") {
plotSNVSummary <- function(stana, sp, param="mean_coverage", perSample=FALSE) {
df <- stana@snpsSummary
if (dim(df)[1]==0) {stop("SNV summary not available")}
df <- df %>% dplyr::filter(species_id == sp)
if (length(stana@cl)!=0) {
df[["group"]] <- listToNV(stana@cl)[df$sample_name]
ggplot(df, aes(x=group, y=.data[[param]])) +
geom_boxplot() + cowplot::theme_cowplot()
if (perSample) {
med <- df[[param]] %>% median(na.rm=TRUE)
if (length(stana@cl)!=0) {
df[["group"]] <- listToNV(stana@cl)[df$sample_name]
ggplot(df, aes(x=sample_name, y=.data[[param]])) +
geom_col(aes(fill=group)) +
scale_fill_manual(values=stana@colors)+
scale_y_continuous(expand = expansion(mult = c(0, 0.05)))+
cowplot::theme_cowplot()+
theme(axis.text.x = element_blank())+
geom_hline(yintercept = med, lty=2)
} else {
ggplot(df, aes(x=sample_name, y=.data[[param]])) +
geom_col() +
scale_y_continuous(expand = expansion(mult = c(0, 0.05)))+
cowplot::theme_cowplot()+
theme(axis.text.x = element_blank())+
geom_hline(yintercept = med, lty=2)
}
} else {
ggplot(df, aes(y=.data[[param]])) +
geom_boxplot() + cowplot::theme_cowplot()
if (length(stana@cl)!=0) {
df[["group"]] <- listToNV(stana@cl)[df$sample_name]
ggplot(df, aes(x=group, y=.data[[param]])) +
geom_boxplot(aes(fill=stana@colors), alpha=0.5) +
cowplot::theme_cowplot()
} else {
ggplot(df, aes(y=.data[[param]])) +
geom_boxplot() + cowplot::theme_cowplot()
}
}

}
102 changes: 100 additions & 2 deletions man/consensusSeq.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 0 additions & 40 deletions man/consensusSeqGeneral.Rd

This file was deleted.

Loading

0 comments on commit b30d7eb

Please sign in to comment.