diff --git a/.gitignore b/.gitignore index 2eb7b45..ce41520 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ -.Rproj.user -.Rhistory -.RData -.Ruserdata -inst/doc -wrappedtools.Rproj +.Rproj.user +.Rhistory +.RData +.Ruserdata +inst/doc +wrappedtools.Rproj +.DS_Store diff --git a/DESCRIPTION b/DESCRIPTION index 60da9d4..afb75d1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,6 +22,11 @@ Authors@R: c( family = "Asser", role = c("aut"), email = "billyasser@hotmail.co.uk", + comment = ""), + person(given = "Franziska", + family = "Eidloth", + role = c("aut"), + email = "franziska.eidloth@gmail.com", comment = "")) Maintainer: Andreas Busjahn License: GPL-3 diff --git a/NAMESPACE b/NAMESPACE index 04286df..3ed7df5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -28,6 +28,7 @@ export(logrange_12357) export(logrange_15) export(logrange_5) export(markSign) +export(mean_cl_boot) export(meansd) export(meanse) export(median_cl_boot) diff --git a/NEWS.md b/NEWS.md index 6e49277..8d8dbc9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ #wrappedtools 0.9.7 - function identical_cols to find and remove duplicated columns +- function compare2numvars can now additionally calculate confidence intervals +- function compare2numvars now has the additional option for a singleline or stacked display +- new function mean_cl_boot which calculates the mean and confidence intervals +- function median_cl_boot now has an additional round option #wrappedtools 0.9.6 - function ksnormal now uses Lilliefors test by default diff --git a/R/descriptives.R b/R/descriptives.R index 70b2ebf..3d88a37 100644 --- a/R/descriptives.R +++ b/R/descriptives.R @@ -255,6 +255,8 @@ se_median <- function(x) { #' @param conf confidence interval with default 95%. #' @param type type for function boot.ci. #' @param nrepl number of bootstrap replications, defaults to 1000. +#' @param round logical, applies [roundR] function to results. Output is character. +#' @param roundDig number of relevant digits for function [roundR]. #' #' @return A tibble with one row and three columns: Median, CIlow, CIhigh. #' @@ -262,18 +264,26 @@ se_median <- function(x) { #' # basic usage of median_cl_boot #' median_cl_boot(x = mtcars$wt) #' @export -median_cl_boot <- function(x, conf = 0.95, type = "basic", nrepl = 10^3) { +median_cl_boot <- function(x, conf = 0.95, type = "basic", nrepl = 10^3, round = FALSE, roundDig = 2) { x <- na.omit(x) lconf <- (1 - conf) / 2 uconf <- 1 - lconf bmedian <- function(x, ind) median(x[ind], na.rm = TRUE) bt <- boot::boot(x, bmedian, nrepl) bb <- boot::boot.ci(bt, type = type) - tibble( - Median = median(x, na.rm = TRUE), - CIlow = quantile(bt$t, lconf), - CIhigh = quantile(bt$t, uconf) - ) + if (round) { + return(tibble( + Median = roundR(median(x, na.rm = TRUE), level = roundDig), + CIlow = roundR(quantile(bt$t, lconf), level = roundDig), + CIhigh = roundR(quantile(bt$t, uconf), level = roundDig) + )) + } else { + return(tibble( + Median = median(x, na.rm = TRUE), + CIlow = quantile(bt$t, lconf), + CIhigh = quantile(bt$t, uconf) + )) + } } #' Rename output from \link{median_cl_boot} for use in ggplot. #' @@ -296,6 +306,47 @@ median_cl_boot_gg <- function(x){ rename(y="Median",ymin="CIlow",ymax="CIhigh") return(out) } + +#' Compute confidence interval of mean by bootstrapping. +#' +#' \code{mean_cl_boot} computes lower and upper confidence limits for the +#' estimated mean, based on bootstrapping. +#' +#' @param x Data for computation. +#' @param conf confidence interval with default 95%. +#' @param type type for function boot.ci. +#' @param nrepl number of bootstrap replications, defaults to 1000. +#' @param round logical, applies [roundR] function to results. Output is character. +#' @param roundDig Number of relevant digits for functio [roundR]. +#' +#' @return A tibble with one row and three columns: Mean, CIlow, CIhigh. +#' +#' @examples +#' # basic usage of mean_cl_boot +#' mean_cl_boot(x = mtcars$wt) +#' @export +mean_cl_boot <- function(x, conf = 0.95, type = "basic", nrepl = 10^3, + round = FALSE, roundDig = 2) ## +{ + x <- na.omit(x) + lconf <- (1 - conf)/2 + uconf <- 1 - lconf + bmean <- function(x, ind) mean(x[ind], na.rm = TRUE) + bt <- boot::boot(x, bmean, nrepl) + bb <- boot::boot.ci(bt, type = type) + + if(round){ + tibble(Mean = roundR(mean(x, na.rm = TRUE), level = roundDig), + CIlow = roundR(quantile(bt$t, lconf), level = roundDig), + CIhigh = roundR(quantile(bt$t, uconf), level = roundDig) + ) + } else{ + tibble(Mean = mean(x, na.rm = TRUE), + CIlow = quantile(bt$t, lconf), + CIhigh = quantile(bt$t, uconf) + ) + } +} #' Compute absolute and relative frequencies. #' #' \code{cat_desc_stats} computes absolute and relative frequencies for diff --git a/R/pkgstart.R b/R/pkgstart.R index 79ecacb..c245213 100644 --- a/R/pkgstart.R +++ b/R/pkgstart.R @@ -40,7 +40,7 @@ NULL #' @importFrom rlist list.append NULL -#' @importFrom forcats fct_lump_n fct_drop +#' @importFrom forcats fct_lump_n fct_drop fct_inorder NULL #' @importFrom grDevices boxplot.stats diff --git a/R/tests.R b/R/tests.R index 6e446a9..ccf061c 100644 --- a/R/tests.R +++ b/R/tests.R @@ -401,7 +401,10 @@ t_var_test <- function(data, formula, cutoff = .05) { #' @param pretext for function [formatP]. #' @param mark for function [formatP]. #' @param n create columns for n per group? -#' @param add_n add n to descriptive statistics? +#' @param add_n add n to descriptive statistics. Will automatically be set to TRUE, if singleline = FALSE and n = TRUE to keep it for the long table format. +#' @param singleline Put all group levels in a single line (default) or below each other. +#' @param indentor Optional text element to indent descriptivestats when using singleline = FALSE. Defaults to " ". +#' @param ci Computes lower and upper confidence limits for the estimated mean/median, based on bootstrapping. #' #' @return #' A tibble with variable names, descriptive statistics, and p-value, @@ -428,14 +431,21 @@ compare2numvars <- function(data, dep_vars, indep_var, range = FALSE, rangesep = " ", pretext = FALSE, mark = FALSE, - n = FALSE, add_n = FALSE) { + n = FALSE, add_n = FALSE, + singleline = TRUE, indentor = " ", ci = FALSE) { `.` <- Group <- Value <- Variable <- desc_groups <- NULL if (gaussian) { DESC <- meansd COMP <- t_var_test + DESC_CI <- mean_cl_boot + string <- "(\\d+\\s*±\\s*\\d+)\\s*(\\[\\d+\\s*->\\s*\\d+\\])\\s*(\\[n=\\d+\\])\\s*(\\[\\d+\\s*;\\s*\\d+\\])" + order <- "\\1 \\4 \\2 \\3" } else { DESC <- median_quart COMP <- wilcox.test + DESC_CI <- median_cl_boot + string <- "(\\d+)\\s*\\((\\d+/\\d+)\\)\\s*(\\[\\d+\\s*->\\s*\\d+\\])\\s*(\\[n=\\d+\\])\\s*(\\[\\d+\\s*;\\s*\\d+\\])" + order <- "\\1 (\\2) \\5 \\3 \\4" } # descnames <- names(formals(DESC)) # pnames <- names(formals(COMP)) @@ -445,67 +455,178 @@ compare2numvars <- function(data, dep_vars, indep_var, Group = all_of(indep_var), all_of(dep_vars) ) |> - mutate(Group = factor(Group) |> fct_drop()) |> - pivot_longer(-Group,names_to = 'Variable',values_to = 'Value') |> - # gather(key = Variable, value = Value, -Group) |> + mutate(Group = fct_drop(factor(Group))) |> + pivot_longer(-Group,names_to = 'Variable',values_to = 'Value') |> mutate(Variable = forcats::fct_inorder(Variable)) |> # na.omit() |> as_tibble() + if(nlevels(data_l$Group)!=2){ stop(paste0('Other than 2 groups provided for ',indep_var,': ', paste(levels(data_l$Group),collapse='/'), ". Look into function compare_n_numvars.")) } + + if (!singleline && n && !add_n){ + add_n = TRUE + print(glue::glue("add_n will be set to TRUE to calculate n for long table format (singleline = FALSE)")) + } + data_l <- data_l |> - filter(!is.na(Group)) - out <- data_l |> - group_by(Variable) |> - do(summarise( - .data = ., - n_groups = paste(table(.$Group[which(!is.na(.$Value))]), collapse = ":"), - desc_all = DESC(.$Value, - roundDig = round_desc, - range = range, rangesep = rangesep, - add_n = add_n - ), - desc_groups = paste(try( - DESC( - x = .$Value, groupvar = .$Group, - roundDig = round_desc, range = range, - rangesep = rangesep, add_n = add_n - ), - silent = TRUE - ), - collapse = ":" - ), - p = formatP(try( - suppressWarnings(COMP(formula = as.formula("Value~Group"), data = .)$p.value), - silent = TRUE - ), - ndigits = round_p, pretext = pretext, - mark = mark - ) |> as.character() - )) |> - ungroup() - out$desc_groups[!str_detect(out$desc_groups, ":")] <- " : " - out <- separate(out, - col = desc_groups, - into = glue::glue("{indep_var} {levels(data_l$Group)}"), - sep = ":" - ) - out <- separate(out, - col = n_groups, - into = glue::glue("n {indep_var} {levels(data_l$Group)}"), - sep = ":" - ) - out$n <- apply(out[, 2:3], 1, function(x) { - sum(as.numeric(x)) - }) - out <- out |> dplyr::select(1, n, starts_with("n "), everything()) + dplyr::filter(!is.na(Group)) + + out <- data_l |> + group_by(Variable) |> + summarise( + n_groups = paste(table(Group[!is.na(Value)]), collapse = ":"), + desc_all = DESC(Value, roundDig = round_desc, + range = range, + rangesep = rangesep, + add_n = add_n), + all_CI = DESC_CI(Value, round = TRUE) |> + transmute(ci = paste0("[", CIlow, "; ", CIhigh, "]")) |> + pull(ci), + desc_groups = try(DESC(Value, groupvar = Group, + roundDig = round_desc, + range = range, rangesep = + rangesep, add_n = add_n), + silent = TRUE) |> + paste(collapse = ":"), + p = try(suppressWarnings(COMP(Value ~ Group, data = cur_data())$p.value), + silent = TRUE) |> + formatP(ndigits = round_p, + pretext = pretext, + mark = mark) |> as.character(), + .groups = "drop" + ) + + group_ci <- data_l |> + group_by(Variable, Group) |> + summarise(ci = DESC_CI(Value, round = TRUE) |> + transmute(ci = paste0("[", CIlow, "; ", CIhigh, "]")) |> + pull(ci), + .groups = "drop") |> + pivot_wider(names_from = Group, values_from = ci, names_prefix = "CI_") - if (n == FALSE) { - out <- dplyr::select(out, -n, -starts_with("n ")) + out <- left_join(out, group_ci, by = "Variable") + out <- out |> + separate(desc_groups, into = c("g1", "g2"), + sep = ":", fill = "right") |> + separate(n_groups, into = glue::glue("n {indep_var} {levels(data_l$Group)}"), + sep = ":") |> + mutate(n = rowSums(across(starts_with("n "), as.numeric), + na.rm = TRUE)) |> + dplyr::select(1, n, starts_with("n "), everything()) + + if (ci){ + out <- out |> + mutate( + desc_all = paste(desc_all, all_CI) |> + str_replace(string, order), + g1 = paste(g1, out[[10]]) |> + str_replace(string, order), + g2 = paste(g2, out[[11]]) |> + str_replace(string, order) + ) |> + dplyr::select(-contains("CI")) + } + else{ + out <- out |> + dplyr::select(-contains("CI")) + } + + if (!n) { + out <- dplyr::select(out, -starts_with("n")) } + + if (!singleline) { + out_tmp <- + out |> + dplyr::select(-starts_with("n")) |> + pivot_longer(cols = -c(Variable, p), + names_to = "group", + values_to = "stats") |> + mutate( + n = str_extract(stats, "\\[n=\\d+\\]") |> + str_extract("\\d+") |> + as.character(), + "Mean (95% CI)" = if (gaussian) { + paste0(str_extract(stats, "^\\d+")," (", + str_extract(stats, "\\[\\d+; \\d+\\]") |> + str_remove_all("[\\[\\]]") |> + str_replace(";", "/"), + ")") + } else {NA_character_}, + SD = if (gaussian) { + str_extract(stats, "(\\d+)\\s*±\\s*(\\d+)") |> + str_extract("\\d+$") |> + as.character()} + else {NA_character_}, + "Median (95% CI)" = if (!gaussian) { + paste0(str_extract(stats, "^\\d+")," (", + str_extract(stats, "\\[\\d+; \\d+\\]") |> + str_remove_all("[\\[\\]]") |> + str_replace(";", "/"), + ")")} + else {NA_character_}, + Quartiles = if (!gaussian) { + str_extract(stats, "\\(\\d+/\\d+\\)") |> + str_remove_all("[\\(\\)]") |> + as.character() + } else { + NA_character_}, + "min -> max" = str_extract(stats, "\\[\\d+\\s*->\\s*\\d+\\]") |> + str_remove_all("[\\[\\]]") + ) |> + select(-stats) |> + select_if(~ !any(is.na(.))) |> + pivot_longer(-c(Variable, group, p), + names_to = "stats", + values_to = "values") |> + pivot_wider(names_from = "group", + values_from = "values") + + for (var_i in dep_vars){ + row <- filter(out_tmp, Variable == var_i) |> + mutate(stats = "", + desc_all = "", + "g1" = "", + "g2" = "") |> + unique() + out_tmp <- dplyr::add_row(out_tmp, row) + } + + out <- out_tmp |> + arrange(Variable) |> + group_by(Variable) |> + arrange(stats != "", .by_group = TRUE) |> + ungroup() |> + mutate(Variable = case_when( + stats == "n" ~ paste0(indentor, "n"), + stats == "Mean (95% CI)" ~ paste0(indentor, "Mean (95% CI)"), + stats == "Median (95% CI)" ~ paste0(indentor, "Median (95% CI)"), + stats == "Quartiles" ~ paste0(indentor, "Quartiles"), + stats == "SD" ~ paste0(indentor, "SD"), + stats == "min -> max" ~ paste0(indentor, "min -> max"), + TRUE ~ Variable), + p = case_when( + stats == "Mean (95% CI)" ~ "", + stats == "Median (95% CI)" ~ "", + stats == "SD" ~ "", + stats == "Quartiles" ~ "", + stats == "min -> max" ~ "", + stats == "n" ~ "", + TRUE ~ p) + ) |> + select(Variable, desc_all, g1, g2, p) + } + + out <- out |> + rename( + !!glue::glue("{indep_var} {levels(data_l$Group)[1]}") := "g1", + !!glue::glue("{indep_var} {levels(data_l$Group)[2]}") := "g2" + ) + return(out) } diff --git a/man/compare2numvars.Rd b/man/compare2numvars.Rd index d7c969a..04026b4 100644 --- a/man/compare2numvars.Rd +++ b/man/compare2numvars.Rd @@ -16,7 +16,10 @@ compare2numvars( pretext = FALSE, mark = FALSE, n = FALSE, - add_n = FALSE + add_n = FALSE, + singleline = TRUE, + indentor = " ", + ci = FALSE ) } \arguments{ @@ -43,6 +46,12 @@ compare2numvars( \item{n}{create columns for n per group?} \item{add_n}{add n to descriptive statistics?} + +\item{singleline}{Put all group levels in a single line (default) or below each other.} + +\item{indentor}{Optional text element to indent descriptive stats when using singleline = FALSE. Defaults to " ".} + +\item{ci}{Computes lower and upper confidence limits for the estimated mean/median, based on bootstrapping.} } \value{ A tibble with variable names, descriptive statistics, and p-value, diff --git a/man/mean_cl_boot.Rd b/man/mean_cl_boot.Rd new file mode 100644 index 0000000..1e17db6 --- /dev/null +++ b/man/mean_cl_boot.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/descriptives.R +\name{mean_cl_boot} +\alias{mean_cl_boot} +\title{Compute confidence interval of mean by bootstrapping.} +\usage{ +mean_cl_boot(x, conf = 0.95, type = "basic", nrepl = 10^3, roundDig = 2) +} +\arguments{ +\item{x}{Data for computation.} + +\item{conf}{confidence interval with default 95\%.} + +\item{type}{type for function boot.ci.} + +\item{nrepl}{number of bootstrap replications, defaults to 1000.} + +\item{roundDig}{Number of relevant digits for functio \link{roundR}.} +} +\value{ +A tibble with one row and three columns: Mean, CIlow, CIhigh. +} +\description{ +\code{mean_cl_boot} computes lower and upper confidence limits for the +estimated mean, based on bootstrapping. +} +\examples{ +# basic usage of mean_cl_boot +mean_cl_boot(x = mtcars$wt) +} diff --git a/man/median_cl_boot.Rd b/man/median_cl_boot.Rd index 5c5934c..6379ed5 100644 --- a/man/median_cl_boot.Rd +++ b/man/median_cl_boot.Rd @@ -4,7 +4,7 @@ \alias{median_cl_boot} \title{Compute confidence interval of median by bootstrapping.} \usage{ -median_cl_boot(x, conf = 0.95, type = "basic", nrepl = 10^3) +median_cl_boot(x, conf = 0.95, type = "basic", nrepl = 10^3, roundDig = 2) } \arguments{ \item{x}{Data for computation.} @@ -14,6 +14,8 @@ median_cl_boot(x, conf = 0.95, type = "basic", nrepl = 10^3) \item{type}{type for function boot.ci.} \item{nrepl}{number of bootstrap replications, defaults to 1000.} + +\item{roundDig}{Number of relevant digits for functio \link{roundR}.} } \value{ A tibble with one row and three columns: Median, CIlow, CIhigh. diff --git a/wrappedtools.Rproj b/wrappedtools.Rproj deleted file mode 100644 index 4667186..0000000 --- a/wrappedtools.Rproj +++ /dev/null @@ -1,19 +0,0 @@ -Version: 1.0 -ProjectId: 0b996309-24a4-4d60-98ab-89fa57cdd5af - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source -PackageRoxygenize: rd,collate,namespace,vignette