Skip to content

Commit

Permalink
Merge pull request #78 from randrescastaneda/improve_efficiency
Browse files Browse the repository at this point in the history
Improve_efficiency
  • Loading branch information
randrescastaneda authored Nov 1, 2024
2 parents c05ab27 + 4fa5e98 commit 83b5ed4
Show file tree
Hide file tree
Showing 13 changed files with 1,189 additions and 175 deletions.
35 changes: 24 additions & 11 deletions R/freq_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ if (getRversion() >= '2.15.1')
#' @param byvar character: name of variable to tabulate. Use Standard evaluation.
#' @param digits numeric: number of decimal places to display. Default is 1.
#' @param na.rm logical: report NA values in frequencies. Default is FALSE.
#' @param freq_var_name character: name for frequency variable. Default is "n"
#'
#' @return data.table with frequencies.
#' @export
Expand All @@ -26,33 +27,45 @@ if (getRversion() >= '2.15.1')
freq_table <- function(x,
byvar,
digits = 1,
na.rm = FALSE) {
na.rm = FALSE,
freq_var_name = "n") {

x_name <- as.character(substitute(x))
if (!is.data.frame(x)) {
cli::cli_abort("Argument {.arg x} ({.field {x_name}}) must be a data frame")
}
if (isFALSE(is.data.table(x))) {
x <- qDT(x)
}


fq <- qtab(x[[byvar]], na.exclude = na.rm)
ft <- data.frame(joyn = names(fq),
n = as.numeric(fq))
fq <- qtab(x[, ..byvar], na.exclude = na.rm, dnn = byvar)

ft <- fq |>
as.data.table() |>
setnames("N", "n") |>
# filter zeros
fsubset(n > 0)

N <- fsum(ft$n)
ft <- ft |>
ftransform(percent = paste0(round(n / N * 100, digits), "%"))

# add row with totals
ft <- rowbind(ft, data.table(joyn = "total",
n = N,
percent = "100%")) |>
# filter zeros
fsubset(n > 0)
total_row <- rep("total", length(byvar)) |>
as.list() |>
as.data.table() |>
setnames(new = byvar) |>
ftransform(n = N,
percent = "100%")

setrename(ft, joyn = byvar, .nse = FALSE)
ft <- rowbind(ft, total_row)
setrename(ft,
n = freq_var_name,
.nse = FALSE)
}



#' Report frequencies from attributes in report var
#'
#' @param x dataframe from [joyn_workhorse]
Expand Down
48 changes: 25 additions & 23 deletions R/is_id.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,40 +35,42 @@ if (getRversion() >= '2.15.1')
#' is_id(y1, by = "id")
is_id <- function(dt,
by,
verbose = getOption("joyn.verbose"),
verbose = getOption("joyn.verbose", default = FALSE),
return_report = FALSE) {

# make sure it is data.table
if (!(is.data.table(dt))) {
# Ensure dt is a data.table
if (!is.data.table(dt)) {
dt <- as.data.table(dt)
} else {
dt <- data.table::copy(dt)
}

# count
m <- dt[, .(copies =.N), by = mget(by)]
is_id <- m[, mean(copies)] == 1
# Check for duplicates
is_id <- !(anyDuplicated(dt, by = by) > 0)

if (verbose) {

cli::cli_h3("Duplicates in terms of {.code {by}}")

d <- freq_table(m, "copies")
print(d[])

cli::cli_rule(right = "End of {.field is_id()} report")

if (is_id) {
cli::cli_alert_success("No duplicates found by {.code {by}}")
} else {
cli::cli_alert_warning("Duplicates found by: {.code {by}}")
}
}

if (isFALSE(return_report)) {

return(is_id)
if (return_report) {
# Return the duplicated rows if requested
if (verbose) cli::cli_h3("Duplicates in terms of {.code {by}}")

} else {
d <- freq_table(x = dt,
byvar = by,
freq_var_name = "copies")

return(m)
if (verbose) {
d |>
fsubset(copies > 1) |>
print()
}

if (verbose) cli::cli_rule(right = "End of {.field is_id()} report")
return(invisible(d))
} else {
return(is_id)
}

}

Loading

0 comments on commit 83b5ed4

Please sign in to comment.