From c3e9d95754fc9d46fab7becbffd0af75e97e007a Mon Sep 17 00:00:00 2001 From: George Arthur Date: Sat, 11 Apr 2026 06:59:06 -0400 Subject: [PATCH] Phase 4: analytics, cleanup, and cheatsheet New functions (R/analytics.R): - twb_calc_complexity(): classify calcs as lod/table_calc/aggregate/raw, extract lod_type (fixed/include/exclude), compute dep_depth (longest calc-on-calc chain via DP on dependency DAG), n_deps token count - twb_field_usage(): cross-workbook field x sheet matrix; long or wide form; context = shelf:rows / shelf:color / filter / etc. - twb_replication_brief(): assembles all 11 intelligence sections into a named list or formatted text; dashboard-scoped; include_sql / include_formulas All three wired into TwbParser as get_*() methods and active bindings. Consistency fixes: - twb_custom_sql(), twb_initial_sql(), twb_published_refs() now accept a TwbParser OR an xml2 document (via .twb_resolve_xml); full roxygen docs added - .normalize_token() in dependency_graph.R: fixed strsplit("\.?") -> "\." (was splitting field names into individual characters, breaking dep_depth) - unname() applied to .dep_depths() vapply result to prevent named vector propagating into tibble columns Testing: - tests/testthat/test-analytics.R: 30 new assertions covering LOD/table_calc/ aggregate/raw classification, dep_depth chain lengths (0/1/2), field_usage long/wide/scoped forms, replication_brief structure and format="text" Cheatsheet: - inst/cheatsheet/twbparser-cheatsheet.tex: 3-column landscape LaTeX cheatsheet covering all 44 exported functions with signatures and runnable examples R CMD check: 0 errors | 0 warnings | 0 notes Co-Authored-By: Claude Sonnet 4.6 --- NAMESPACE | 9 + R/active-bindings.R | 4 + R/analytics.R | 584 +++++++++++++++++++++++ R/dependency_graph.R | 2 +- R/globals.R | 9 +- R/published.R | 47 +- R/sql.R | 67 ++- R/twb_parser.R | 45 ++ inst/WORDLIST | 2 + inst/cheatsheet/twbparser-cheatsheet.tex | 576 ++++++++++++++++++++++ man/TwbParser.Rd | 23 +- man/twb_calc_complexity.Rd | 46 ++ man/twb_custom_sql.Rd | 25 +- man/twb_field_usage.Rd | 54 +++ man/twb_initial_sql.Rd | 22 +- man/twb_published_refs.Rd | 26 +- man/twb_replication_brief.Rd | 66 +++ tests/testthat/test-analytics.R | 355 ++++++++++++++ 18 files changed, 1918 insertions(+), 44 deletions(-) create mode 100644 R/analytics.R create mode 100644 inst/cheatsheet/twbparser-cheatsheet.tex create mode 100644 man/twb_calc_complexity.Rd create mode 100644 man/twb_field_usage.Rd create mode 100644 man/twb_replication_brief.Rd create mode 100644 tests/testthat/test-analytics.R diff --git a/NAMESPACE b/NAMESPACE index 06c8a06..4d79d10 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,7 @@ export(prettify_calculated_fields) export(tableau_formula_pretty) export(tbs_custom_sql_graphql) export(tbs_publish_info) +export(twb_calc_complexity) export(twb_charts) export(twb_colors) export(twb_custom_sql) @@ -29,11 +30,13 @@ export(twb_dashboard_layout) export(twb_dashboard_sheets) export(twb_dashboard_summary) export(twb_dashboards) +export(twb_field_usage) export(twb_initial_sql) export(twb_page_composition) export(twb_pages) export(twb_pages_summary) export(twb_published_refs) +export(twb_replication_brief) export(twb_sheet_axes) export(twb_sheet_filters) export(twb_sheet_shelves) @@ -67,8 +70,12 @@ importFrom(igraph,E) importFrom(igraph,V) importFrom(igraph,gorder) importFrom(igraph,graph_from_data_frame) +importFrom(igraph,induced_subgraph) +importFrom(igraph,is_dag) importFrom(igraph,layout_with_fr) importFrom(igraph,make_empty_graph) +importFrom(igraph,neighbors) +importFrom(igraph,topo_sort) importFrom(purrr,map) importFrom(purrr,map_chr) importFrom(purrr,map_dfr) @@ -78,10 +85,12 @@ importFrom(stringr,str_extract_all) importFrom(stringr,str_to_title) importFrom(tibble,as_tibble) importFrom(tibble,tibble) +importFrom(tidyr,pivot_wider) importFrom(tidyr,replace_na) importFrom(tidyr,unnest_longer) importFrom(tools,file_ext) importFrom(tools,file_path_sans_ext) +importFrom(utils,capture.output) importFrom(utils,globalVariables) importFrom(utils,tail) importFrom(utils,unzip) diff --git a/R/active-bindings.R b/R/active-bindings.R index 7267cb2..04d453e 100644 --- a/R/active-bindings.R +++ b/R/active-bindings.R @@ -100,6 +100,10 @@ twb_install_active_properties <- function(x, cache = TRUE) { rebind("dashboard_layout", wrap_cache("dashboard_layout", function() x$get_dashboard_layout())) rebind("dashboard_actions", wrap_cache("dashboard_actions", function() x$get_dashboard_actions())) + ## Phase 4: analytics + rebind("calc_complexity", wrap_cache("calc_complexity", function() x$get_calc_complexity())) + rebind("field_usage", wrap_cache("field_usage", function() x$get_field_usage())) + ## Validation snapshot (read-only) rebind( "validation", diff --git a/R/analytics.R b/R/analytics.R new file mode 100644 index 0000000..3f11a95 --- /dev/null +++ b/R/analytics.R @@ -0,0 +1,584 @@ +#' @importFrom igraph gorder is_dag topo_sort induced_subgraph neighbors V +#' @importFrom dplyr mutate select arrange distinct filter bind_rows count rename +#' @importFrom tidyr pivot_wider +#' @importFrom tibble tibble +#' @importFrom utils capture.output +NULL + +# ---- Internal helpers -------------------------------------------------------- + +#' Classify a vector of Tableau formulas into computation categories +#' Precedence: lod > table_calc > aggregate > raw +#' @keywords internal +#' @noRd +.classify_calc_type <- function(formula, is_table_calc) { + vapply(seq_along(formula), function(i) { + f <- formula[[i]] + tc <- isTRUE(is_table_calc[[i]]) + if (is.na(f)) + return("raw") + if (grepl("\\{\\s*(FIXED|INCLUDE|EXCLUDE)\\b", f, + ignore.case = TRUE, perl = TRUE)) + return("lod") + if (tc) + return("table_calc") + if (grepl( + paste0("\\b(SUM|AVG|MIN|MAX|COUNT|COUNTD|MEDIAN|STDEV|STDEVP|", + "VAR|VARP|ATTR|AGG|RAWSQLAGG|PERCENTILE|CORR|COVAR|COVARP)\\s*\\("), + f, ignore.case = TRUE, perl = TRUE)) + return("aggregate") + "raw" + }, character(1L)) +} + +#' Extract the LOD sub-type (fixed / include / exclude) from a formula vector +#' @keywords internal +#' @noRd +.extract_lod_type <- function(formula, calc_type) { + vapply(seq_along(formula), function(i) { + if (identical(calc_type[[i]], "lod") && !is.na(formula[[i]])) { + m <- regmatches( + formula[[i]], + regexpr("(FIXED|INCLUDE|EXCLUDE)", formula[[i]], + ignore.case = TRUE, perl = TRUE) + ) + if (length(m) && nzchar(m[[1L]])) tolower(m[[1L]]) else NA_character_ + } else { + NA_character_ + } + }, character(1L)) +} + +#' Count distinct bracketed field tokens in each formula +#' @keywords internal +#' @noRd +.count_formula_deps <- function(formula) { + vapply(formula, function(f) { + if (is.na(f)) 0L else length(unique(.extract_tokens(f))) + }, integer(1L)) +} + +#' Longest-path depth through the calc-field subgraph (DP over topo order) +#' @keywords internal +#' @noRd +.dep_depths <- function(g, calc_names) { + n_calcs <- length(calc_names) + if (igraph::gorder(g) == 0L || n_calcs == 0L) + return(rep(0L, n_calcs)) + + all_verts <- igraph::V(g)$name + calc_verts <- intersect(all_verts, calc_names) + if (length(calc_verts) == 0L) + return(rep(0L, n_calcs)) + + # Sub-graph of calc fields only so raw-field hops are not counted + sub_g <- igraph::induced_subgraph( + g, vids = igraph::V(g)[all_verts %in% calc_verts] + ) + sub_names <- igraph::V(sub_g)$name + + if (!igraph::is_dag(sub_g)) { + warning( + "Circular dependencies detected in calculated fields. ", + "dep_depth is set to NA for all fields.", + call. = FALSE + ) + return(rep(NA_integer_, n_calcs)) + } + + # DP: dp[i] = longest path from any source vertex to vertex i + dp <- rep(0L, igraph::gorder(sub_g)) + topo <- igraph::topo_sort(sub_g, mode = "out") # sources first + + for (v in as.integer(topo)) { + preds <- as.integer(igraph::neighbors(sub_g, v, mode = "in")) + if (length(preds) > 0L) + dp[[v]] <- max(dp[preds]) + 1L + } + + unname(vapply(calc_names, function(nm) { + idx <- match(nm, sub_names) + if (!is.na(idx)) dp[[idx]] else 0L + }, integer(1L))) +} + +# ---- twb_calc_complexity ----------------------------------------------------- + +#' Classify calculated fields by complexity +#' +#' Returns every calculated field in the workbook enriched with a computation +#' category (`calc_type`), LOD sub-type, dependency count, and dependency depth +#' — the maximum number of calc-on-calc hops in the field's dependency chain. +#' +#' @param x A `TwbParser` object or an `xml2` document. +#' @param include_parameters Logical; if `TRUE`, include parameter fields +#' (they always land in `calc_type = "raw"` and `dep_depth = 0`). +#' Default `FALSE`. +#' +#' @return A tibble with columns: +#' \describe{ +#' \item{datasource}{Datasource the field belongs to.} +#' \item{name}{Human-readable field name.} +#' \item{tableau_internal_name}{Bracketed internal Tableau name.} +#' \item{datatype}{Field data type.} +#' \item{role}{`"measure"` or `"dimension"`.} +#' \item{calc_type}{One of `"lod"`, `"table_calc"`, `"aggregate"`, `"raw"`. +#' Tested in that precedence order.} +#' \item{lod_type}{`"fixed"`, `"include"`, or `"exclude"`; `NA` if not LOD.} +#' \item{is_table_calc}{Logical; existing heuristic flag preserved for +#' backward compatibility.} +#' \item{dep_depth}{Integer; longest chain of calc-on-calc dependencies. +#' `0` means the field only references raw fields (or has no references).} +#' \item{n_deps}{Integer; count of distinct bracketed tokens in the formula.} +#' \item{formula}{Raw formula string.} +#' } +#' +#' @examples +#' twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") +#' stopifnot(nzchar(twb), file.exists(twb)) +#' xml <- xml2::read_xml(twb) +#' twb_calc_complexity(xml) +#' +#' @export +twb_calc_complexity <- function(x, include_parameters = FALSE) { + xml_doc <- .twb_resolve_xml(x) + stopifnot( + is.logical(include_parameters), length(include_parameters) == 1L, + !is.na(include_parameters) + ) + .ins_calc_complexity(xml_doc, include_parameters) +} + +#' @keywords internal +#' @noRd +.ins_calc_complexity <- function(xml_doc, include_parameters = FALSE) { + calcs <- extract_calculated_fields(xml_doc, + include_parameters = include_parameters) + if (nrow(calcs) == 0L) return(.empty_calc_complexity()) + + calcs <- calcs |> + dplyr::mutate( + calc_type = .classify_calc_type(formula, is_table_calc), + lod_type = .extract_lod_type(formula, calc_type), + n_deps = .count_formula_deps(formula) + ) + + dep_d <- .dep_depths( + build_dependency_graph(calcs), + calc_names = calcs$name + ) + + calcs |> + dplyr::mutate(dep_depth = dep_d) |> + dplyr::select( + "datasource", "name", "tableau_internal_name", "datatype", "role", + "calc_type", "lod_type", "is_table_calc", "dep_depth", "n_deps", + "formula" + ) |> + dplyr::arrange(.data$datasource, .data$calc_type, .data$name) +} + +.empty_calc_complexity <- function() { + tibble::tibble( + datasource = character(), + name = character(), + tableau_internal_name = character(), + datatype = character(), + role = character(), + calc_type = character(), + lod_type = character(), + is_table_calc = logical(), + dep_depth = integer(), + n_deps = integer(), + formula = character() + ) +} + +# ---- twb_field_usage --------------------------------------------------------- + +#' Field usage matrix across worksheets +#' +#' Combines shelf placement and filter usage into a tidy long tibble showing +#' where each field appears and in what capacity across all (or selected) +#' worksheets. +#' +#' @param x A `TwbParser` object or an `xml2` document. +#' @param include_filters Logical; include filter appearances. Default `TRUE`. +#' @param include_shelves Logical; include shelf appearances (rows, cols, color, +#' size, etc.). Default `TRUE`. +#' @param wide Logical; if `TRUE`, pivot to one row per field with one column +#' per sheet containing a comma-separated list of contexts, or `NA` if the +#' field does not appear on that sheet. Default `FALSE`. +#' +#' @return +#' **Long form** (`wide = FALSE`): a tibble with columns: +#' \describe{ +#' \item{field_clean}{Human-readable field name.} +#' \item{datasource}{Datasource the field belongs to.} +#' \item{sheet}{Worksheet name.} +#' \item{context}{Usage context, e.g. `"shelf:rows"`, `"shelf:color"`, +#' `"filter"`.} +#' \item{n_appearances}{Number of times the field appears in this context on +#' this sheet (handles multi-pill rows/cols).} +#' } +#' +#' **Wide form** (`wide = TRUE`): one row per `(field_clean, datasource)`, +#' one column per sheet, cell value is a comma-separated context string or +#' `NA`. +#' +#' @examples +#' twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") +#' stopifnot(nzchar(twb), file.exists(twb)) +#' xml <- xml2::read_xml(twb) +#' twb_field_usage(xml) +#' twb_field_usage(xml, wide = TRUE) +#' +#' @export +twb_field_usage <- function(x, + include_filters = TRUE, + include_shelves = TRUE, + wide = FALSE) { + xml_doc <- .twb_resolve_xml(x) + stopifnot( + is.logical(include_filters), length(include_filters) == 1L, + is.logical(include_shelves), length(include_shelves) == 1L, + is.logical(wide), length(wide) == 1L + ) + if (!include_filters && !include_shelves) { + message("`include_filters` and `include_shelves` are both FALSE: ", + "returning empty tibble.") + return(.empty_field_usage()) + } + .ins_field_usage(xml_doc, include_filters, include_shelves, wide) +} + +#' @keywords internal +#' @noRd +.ins_field_usage <- function(xml_doc, + include_filters = TRUE, + include_shelves = TRUE, + wide = FALSE) { + parts <- list() + + if (include_shelves) { + sh <- .ins_sheet_shelves(xml_doc) + if (nrow(sh) > 0L) { + parts[["shelves"]] <- sh |> + dplyr::filter(!is.na(.data$field_clean), nzchar(.data$field_clean)) |> + dplyr::mutate(context = paste0("shelf:", .data$shelf)) |> + dplyr::select("field_clean", "datasource", "sheet", "context") + } + } + + if (include_filters) { + fl <- .ins_sheet_filters(xml_doc) + if (nrow(fl) > 0L) { + parts[["filters"]] <- fl |> + dplyr::filter(!is.na(.data$field_clean), nzchar(.data$field_clean)) |> + dplyr::mutate(context = "filter") |> + dplyr::select("field_clean", "datasource", "sheet", "context") + } + } + + if (length(parts) == 0L) return(.empty_field_usage()) + + combined <- dplyr::bind_rows(parts) |> + dplyr::count(.data$field_clean, .data$datasource, .data$sheet, + .data$context, name = "n_appearances") |> + dplyr::mutate(n_appearances = as.integer(.data$n_appearances)) |> + dplyr::arrange(.data$field_clean, .data$sheet, .data$context) + + if (!wide) return(combined) + + # Wide form: one row per (field_clean, datasource), one col per sheet + tidyr::pivot_wider( + combined, + id_cols = c("field_clean", "datasource"), + names_from = "sheet", + values_from = "context", + values_fn = function(ctx) paste(sort(unique(ctx)), collapse = ", ") + ) +} + +.empty_field_usage <- function() { + tibble::tibble( + field_clean = character(), + datasource = character(), + sheet = character(), + context = character(), + n_appearances = integer() + ) +} + +# ---- twb_replication_brief --------------------------------------------------- + +#' Replication brief for a Tableau workbook or dashboard +#' +#' Assembles all extracted intelligence — datasources, parameters, calculated +#' fields with complexity classifications, field usage, filters, sorts, chart +#' types, dashboard layout, and actions — into a single named list (or +#' formatted text) ready for use when porting to another visualisation tool. +#' +#' @param x A `TwbParser` object or an `xml2` document. +#' @param dashboard Optional character scalar. When supplied, sheet-level +#' sections (filters, sorts, chart types, field usage, layout) are scoped to +#' the sheets that belong to this dashboard. +#' @param include_sql Logical; include custom SQL blocks in `$custom_sql`. +#' Default `TRUE`. +#' @param include_formulas Logical; when `TRUE`, a `formula_pretty` column is +#' added to `$calculated_fields`. Default `TRUE`. +#' @param format Either `"list"` (default) to return a named R list, or +#' `"text"` to return a single formatted character string suitable for +#' printing or writing to a file. +#' +#' @return +#' **`format = "list"`**: a named list with elements: +#' \describe{ +#' \item{meta}{1-row tibble: file name, counts, generation timestamp.} +#' \item{datasources}{Datasource connection details.} +#' \item{parameters}{Parameter fields with current values.} +#' \item{custom_sql}{Custom SQL blocks, or `NULL` if `include_sql = FALSE`.} +#' \item{calculated_fields}{Tibble from [twb_calc_complexity()], optionally +#' with a `formula_pretty` column.} +#' \item{field_usage}{Tibble from [twb_field_usage()].} +#' \item{filters}{Worksheet filters (scoped to `dashboard` if given).} +#' \item{sorts}{Worksheet sorts (scoped to `dashboard` if given).} +#' \item{chart_types}{Mark types per worksheet.} +#' \item{dashboard_layout}{Zone positions from [twb_dashboard_sheets()].} +#' \item{actions}{Dashboard actions from [twb_dashboard_actions()].} +#' } +#' +#' **`format = "text"`**: a single `character(1)` with section headers and +#' tabular output. +#' +#' @examples +#' twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") +#' stopifnot(nzchar(twb), file.exists(twb)) +#' xml <- xml2::read_xml(twb) +#' brief <- twb_replication_brief(xml) +#' names(brief) +#' brief$meta +#' +#' @export +twb_replication_brief <- function(x, + dashboard = NULL, + include_sql = TRUE, + include_formulas = TRUE, + format = c("list", "text")) { + xml_doc <- .twb_resolve_xml(x) + format <- match.arg(format) + + if (!is.null(dashboard)) { + stopifnot(is.character(dashboard), length(dashboard) == 1L) + dashboard <- gsub("'", "", dashboard, fixed = TRUE) + } + stopifnot( + is.logical(include_sql), length(include_sql) == 1L, + is.logical(include_formulas), length(include_formulas) == 1L + ) + + brief <- .ins_replication_brief(xml_doc, x, dashboard, + include_sql, include_formulas) + + if (identical(format, "text")) return(.brief_to_text(brief)) + brief +} + +#' @keywords internal +#' @noRd +.ins_replication_brief <- function(xml_doc, x_orig, dashboard, + include_sql, include_formulas) { + # -- meta ------------------------------------------------------------------- + workbook_file <- if (inherits(x_orig, "TwbParser")) { + basename(x_orig$path %||% "") + } else { + "" + } + + pages <- .ins_pages(xml_doc) + n_worksheets <- sum(pages$page_type == "worksheet", na.rm = TRUE) + n_dashboards <- sum(pages$page_type == "dashboard", na.rm = TRUE) + + ds_details <- tryCatch( + extract_datasource_details(xml_doc), + error = function(e) list(data_sources = tibble::tibble(), + parameters = tibble::tibble()) + ) + calcs <- .ins_calc_complexity(xml_doc) + params <- tryCatch(extract_parameters(xml_doc), error = function(e) tibble::tibble()) + + meta <- tibble::tibble( + workbook_file = workbook_file, + n_datasources = nrow(ds_details$data_sources), + n_worksheets = as.integer(n_worksheets), + n_dashboards = as.integer(n_dashboards), + n_calculated_fields = nrow(calcs), + n_parameters = nrow(params), + generated_at = format(Sys.time(), "%Y-%m-%d %H:%M:%S") + ) + + # -- datasources & parameters ----------------------------------------------- + datasources <- ds_details$data_sources + parameters <- params + + # -- custom SQL ------------------------------------------------------------- + custom_sql <- if (include_sql) { + tryCatch(twb_custom_sql(xml_doc), error = function(e) tibble::tibble()) + } else { + NULL + } + + # -- calculated fields ------------------------------------------------------ + calc_fields <- calcs + if (include_formulas && nrow(calc_fields) > 0L) { + pretty <- tryCatch( + prettify_calculated_fields( + calc_fields |> dplyr::select("name", "formula"), + wrap = 100L + ), + error = function(e) NULL + ) + if (!is.null(pretty) && "formula_pretty" %in% names(pretty)) { + calc_fields <- dplyr::bind_rows( + calc_fields, + tibble::tibble(formula_pretty = pretty$formula_pretty) + ) |> + # safe merge: join on name to avoid row-count mismatch + (\(cf) { + cf |> dplyr::select(-"formula_pretty") |> + dplyr::bind_cols( + tibble::tibble( + formula_pretty = pretty$formula_pretty[ + match(cf$name, pretty$name) + ] + ) + ) + })() + } + } + + # -- determine sheet scope -------------------------------------------------- + scoped_sheets <- if (!is.null(dashboard)) { + db_sh <- tryCatch( + .ins_dashboard_sheets(xml_doc, dashboard), + error = function(e) tibble::tibble(sheet = character()) + ) + if (nrow(db_sh) > 0L) unique(db_sh$sheet) else character() + } else { + NULL # NULL means all sheets + } + + .scope <- function(tbl, col = "sheet") { + if (is.null(scoped_sheets) || !col %in% names(tbl)) return(tbl) + dplyr::filter(tbl, .data[[col]] %in% scoped_sheets) + } + + # -- field usage ------------------------------------------------------------ + field_usage <- tryCatch( + .scope(.ins_field_usage(xml_doc, include_filters = TRUE, + include_shelves = TRUE, wide = FALSE)), + error = function(e) .empty_field_usage() + ) + + # -- filters & sorts -------------------------------------------------------- + filters <- tryCatch( + .scope(.ins_sheet_filters(xml_doc)), + error = function(e) .empty_filters() + ) + sorts <- tryCatch( + .scope(.ins_sheet_sorts(xml_doc)), + error = function(e) .empty_sorts() + ) + + # -- chart types ------------------------------------------------------------ + chart_types <- tryCatch({ + ct <- .ins_charts(xml_doc) + if (!is.null(scoped_sheets) && "worksheet" %in% names(ct)) + ct <- dplyr::filter(ct, .data$worksheet %in% scoped_sheets) + ct + }, error = function(e) tibble::tibble()) + + # -- dashboard layout & actions --------------------------------------------- + db_layout <- tryCatch( + .ins_dashboard_sheets(xml_doc, dashboard), + error = function(e) tibble::tibble() + ) + actions <- tryCatch( + .ins_dashboard_actions(xml_doc, dashboard), + error = function(e) tibble::tibble() + ) + + list( + meta = meta, + datasources = datasources, + parameters = parameters, + custom_sql = custom_sql, + calculated_fields = calc_fields, + field_usage = field_usage, + filters = filters, + sorts = sorts, + chart_types = chart_types, + dashboard_layout = db_layout, + actions = actions + ) +} + +# ---- text formatter ---------------------------------------------------------- + +#' @keywords internal +#' @noRd +.brief_to_text <- function(brief) { + lines <- character() + + .hdr <- function(title) c(paste0("## ", title), "") + + .tbl <- function(tbl) { + if (is.null(tbl) || (is.data.frame(tbl) && nrow(tbl) == 0L)) + return(c(" (none)", "")) + c(utils::capture.output(print(as.data.frame(tbl), row.names = FALSE)), + "") + } + + .kv <- function(label, value) { + sprintf(" %-25s %s", paste0(label, ":"), value) + } + + # Header + lines <- c(lines, "# TWBPARSER REPLICATION BRIEF", "") + + # Meta + m <- brief$meta + lines <- c( + lines, + .hdr("WORKBOOK"), + .kv("File", m$workbook_file), + .kv("Generated at", m$generated_at), + .kv("Datasources", m$n_datasources), + .kv("Worksheets", m$n_worksheets), + .kv("Dashboards", m$n_dashboards), + .kv("Calculated fields", m$n_calculated_fields), + .kv("Parameters", m$n_parameters), + "" + ) + + .section <- function(title, tbl) c(.hdr(title), .tbl(tbl)) + + lines <- c(lines, + .section("DATASOURCES", brief$datasources), + .section("PARAMETERS", brief$parameters) + ) + if (!is.null(brief$custom_sql)) + lines <- c(lines, .section("CUSTOM SQL", brief$custom_sql)) + + lines <- c(lines, + .section("CALCULATED FIELDS (complexity)", brief$calculated_fields), + .section("FIELD USAGE", brief$field_usage), + .section("FILTERS", brief$filters), + .section("SORTS", brief$sorts), + .section("CHART TYPES", brief$chart_types), + .section("DASHBOARD LAYOUT", brief$dashboard_layout), + .section("ACTIONS", brief$actions) + ) + + paste(lines, collapse = "\n") +} diff --git a/R/dependency_graph.R b/R/dependency_graph.R index eb6645b..365ef81 100644 --- a/R/dependency_graph.R +++ b/R/dependency_graph.R @@ -8,7 +8,7 @@ tok <- sub("^([^:]+:)+", "", tok) # remove prefixes like "none:" or "clct:" # Split table-qualified references: [Table].[Field] -> take Field - parts <- strsplit(tok, "\\.?", fixed = FALSE)[[1]] + parts <- strsplit(tok, "\\.", fixed = FALSE)[[1]] parts <- parts[nzchar(parts)] parts <- gsub("^\\s+|\\s+$", "", parts) if (length(parts) == 0) { diff --git a/R/globals.R b/R/globals.R index 3913aa0..47f3afc 100644 --- a/R/globals.R +++ b/R/globals.R @@ -27,5 +27,12 @@ utils::globalVariables(c( # dashboard_details "action_name", "action_type", "source_sheets", "target_sheet", "run_on", "url", "layout_type", "parent_zone_id", "component_type", "zone_id", - "sheet" + "sheet", + # analytics (Phase 4) + "is_table_calc", + "calc_type", "lod_type", "dep_depth", "n_deps", + "n_appearances", "context", + "workbook_file", "n_datasources", "n_worksheets", "n_dashboards", + "n_calculated_fields", "n_parameters", "generated_at", + "formula_pretty" )) diff --git a/R/published.R b/R/published.R index 8b62d26..355e571 100644 --- a/R/published.R +++ b/R/published.R @@ -1,28 +1,51 @@ -#' Detect likely references to published data sources (vs embedded) +#' Detect references to published data sources +#' +#' Inspects datasource nodes and heuristically flags those that reference a +#' published (server-side) source rather than an embedded one. +#' +#' @param x A `TwbParser` object **or** an `xml2` document. +#' +#' @return A tibble with columns: +#' \describe{ +#' \item{name}{Internal datasource name.} +#' \item{caption}{User-visible caption.} +#' \item{hasconn}{Value of the `hasconnection` attribute.} +#' \item{likely_published}{`TRUE` when `hasconnection = false` or when the +#' node text contains published-source markers.} +#' \item{hints}{Short explanation of the classification.} +#' } +#' +#' @examples +#' twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") +#' stopifnot(nzchar(twb), file.exists(twb)) +#' xml <- xml2::read_xml(twb) +#' twb_published_refs(xml) +#' #' @importFrom rlang .data -#' @param xml_doc An xml2 document for a .twb -#' @return tibble with datasource name, caption, likely_published, hints #' @export -twb_published_refs <- function(xml_doc) { - stopifnot(inherits(xml_doc, "xml_document")) +twb_published_refs <- function(x) { + xml_doc <- .twb_resolve_xml(x) dsn <- xml2::xml_find_all(xml_doc, "//datasource") - if (length(dsn) == 0) { + if (length(dsn) == 0L) { return(tibble::tibble( - name = character(), caption = character(), - likely_published = logical(), hints = character() + name = character(), + caption = character(), + likely_published = logical(), + hints = character() )) } tibble::tibble( name = xml2::xml_attr(dsn, "name"), caption = xml2::xml_attr(dsn, "caption"), hasconn = xml2::xml_attr(dsn, "hasconnection"), - raw = vapply(dsn, xml2::xml_text, character(1)) + raw = vapply(dsn, xml2::xml_text, character(1L)) ) |> dplyr::mutate( likely_published = .data$hasconn %in% c("false", "0") | - stringr::str_detect(.data$raw, - "(?i)published|tableau server|tableau cloud|catalog-id|content-url" + stringr::str_detect( + .data$raw, + "(?i)published|tableau server|tableau cloud|catalog-id|content-url" ), hints = dplyr::if_else( .data$likely_published, @@ -30,5 +53,5 @@ twb_published_refs <- function(xml_doc) { "embedded or no published markers" ) ) |> - dplyr::select(-raw) + dplyr::select(-"raw") } diff --git a/R/sql.R b/R/sql.R index c874ccf..28a85e9 100644 --- a/R/sql.R +++ b/R/sql.R @@ -1,10 +1,29 @@ -#' Extract Custom SQL relations from a TWB XML +#' Extract Custom SQL relations from a Tableau workbook +#' +#' Finds every `` node that looks like a SQL statement +#' and returns its name, type, raw SQL text, and a flag for whether it starts +#' with `SELECT` or `WITH`. +#' +#' @param x A `TwbParser` object **or** an `xml2` document. +#' +#' @return A tibble with columns: +#' \describe{ +#' \item{relation_name}{Name attribute of the relation node.} +#' \item{relation_type}{Type attribute (e.g. `"text"`, `"table"`).} +#' \item{custom_sql}{Full SQL text.} +#' \item{is_custom_sql}{`TRUE` when the text begins with `SELECT` or `WITH`.} +#' } +#' +#' @examples +#' twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") +#' stopifnot(nzchar(twb), file.exists(twb)) +#' xml <- xml2::read_xml(twb) +#' twb_custom_sql(xml) +#' #' @importFrom rlang .data -#' @param xml_doc An xml2 document for a .twb -#' @return tibble with relation_name, relation_type, custom_sql #' @export -twb_custom_sql <- function(xml_doc) { - stopifnot(inherits(xml_doc, "xml_document")) +twb_custom_sql <- function(x) { + xml_doc <- .twb_resolve_xml(x) rels <- xml2::xml_find_all(xml_doc, "//relation[@formula]") tibble::tibble( relation_name = xml2::xml_attr(rels, "name"), @@ -14,22 +33,42 @@ twb_custom_sql <- function(xml_doc) { dplyr::filter(!is.na(.data$custom_sql)) |> dplyr::mutate( is_custom_sql = dplyr::coalesce( - stringr::str_detect(.data$custom_sql, - stringr::regex("^\\s*(select|with)\\b", ignore_case = TRUE) + stringr::str_detect( + .data$custom_sql, + stringr::regex("^\\s*(select|with)\\b", ignore_case = TRUE) ), FALSE ) ) } -#' Extract Initial SQL statements from connections (if present) -#' @param xml_doc An xml2 document for a .twb -#' @return tibble with connection_id, initial_sql +#' Extract Initial SQL statements from Tableau connections +#' +#' Returns any `` nodes found inside connection or +#' named-connection elements. +#' +#' @param x A `TwbParser` object **or** an `xml2` document. +#' +#' @return A tibble with columns: +#' \describe{ +#' \item{connection_id}{Name or caption of the parent connection element.} +#' \item{initial_sql}{SQL text of the initial statement.} +#' } +#' +#' @examples +#' twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") +#' stopifnot(nzchar(twb), file.exists(twb)) +#' xml <- xml2::read_xml(twb) +#' twb_initial_sql(xml) +#' #' @export -twb_initial_sql <- function(xml_doc) { - stopifnot(inherits(xml_doc, "xml_document")) - nodes <- xml2::xml_find_all(xml_doc, "//connection/initial-sql | //named-connection/initial-sql") - if (length(nodes) == 0) { +twb_initial_sql <- function(x) { + xml_doc <- .twb_resolve_xml(x) + nodes <- xml2::xml_find_all( + xml_doc, + "//connection/initial-sql | //named-connection/initial-sql" + ) + if (length(nodes) == 0L) { return(tibble::tibble(connection_id = character(), initial_sql = character())) } tibble::tibble( diff --git a/R/twb_parser.R b/R/twb_parser.R index ae55152..ef86634 100644 --- a/R/twb_parser.R +++ b/R/twb_parser.R @@ -251,7 +251,52 @@ TwbParser <- R6::R6Class( safe_call(.ins_dashboard_actions(self$xml_doc, dashboard), .empty_actions()) }, + # --- Phase 4: analytics --- + #' @description Calculated field complexity classifications. + #' @param include_parameters Logical; include parameter fields. Default `FALSE`. + get_calc_complexity = function(include_parameters = FALSE) { + safe_call( + twb_calc_complexity(self$xml_doc, include_parameters = include_parameters), + .empty_calc_complexity() + ) + }, + + #' @description Field usage matrix across worksheets. + #' @param include_filters Include filter appearances. Default `TRUE`. + #' @param include_shelves Include shelf appearances. Default `TRUE`. + #' @param wide Return wide format (one col per sheet). Default `FALSE`. + get_field_usage = function(include_filters = TRUE, + include_shelves = TRUE, + wide = FALSE) { + safe_call( + twb_field_usage(self$xml_doc, + include_filters = include_filters, + include_shelves = include_shelves, + wide = wide), + .empty_field_usage() + ) + }, + + #' @description Full replication brief for the workbook or a single dashboard. + #' @param dashboard Optional dashboard name to scope the brief. + #' @param include_sql Include custom SQL blocks. Default `TRUE`. + #' @param include_formulas Add `formula_pretty` to calculated fields. + #' Default `TRUE`. + #' @param format `"list"` (default) or `"text"`. + get_replication_brief = function(dashboard = NULL, + include_sql = TRUE, + include_formulas = TRUE, + format = c("list", "text")) { + safe_call( + twb_replication_brief(self, + dashboard = dashboard, + include_sql = include_sql, + include_formulas = include_formulas, + format = match.arg(format)), + list() + ) + }, # --- validator bridge --- #' @description Validate relationships; optionally stop on failure. diff --git a/inst/WORDLIST b/inst/WORDLIST index 6a7c5e8..a888798 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -21,6 +21,8 @@ DAGs datasource deduplication dplyr +LOD +visualisation Datasource datasources Datasources diff --git a/inst/cheatsheet/twbparser-cheatsheet.tex b/inst/cheatsheet/twbparser-cheatsheet.tex new file mode 100644 index 0000000..1ad0703 --- /dev/null +++ b/inst/cheatsheet/twbparser-cheatsheet.tex @@ -0,0 +1,576 @@ +% twbparser R Package Cheatsheet +% Compile with: pdflatex twbparser-cheatsheet.tex (twice) +% Requires: geometry, multicol, tcolorbox, listings, xcolor, hyperref, fontenc, inputenc + +\documentclass[10pt,landscape]{article} + +% ---- Page geometry ----------------------------------------------------------- +\usepackage[ + landscape, + top=6mm, bottom=6mm, + left=6mm, right=6mm, + includehead=false +]{geometry} + +% ---- Encoding & fonts -------------------------------------------------------- +\usepackage[T1]{fontenc} +\usepackage[utf8]{inputenc} +\usepackage{lmodern} +\usepackage{microtype} + +% ---- Colours ----------------------------------------------------------------- +\usepackage{xcolor} +\definecolor{twbBlue} {HTML}{1F4E79} % deep navy +\definecolor{twbMid} {HTML}{2E75B6} % mid blue +\definecolor{twbLight} {HTML}{DDEEFF} % pale blue fill +\definecolor{twbGreen} {HTML}{1E6B3C} % dark green +\definecolor{twbGreenL} {HTML}{D6F0E0} % pale green fill +\definecolor{twbOrange} {HTML}{BF4A00} % orange / warning +\definecolor{twbOrangeL}{HTML}{FFE8D6} % pale orange +\definecolor{twbPurple} {HTML}{5B2D8E} % purple +\definecolor{twbPurpleL}{HTML}{EDE0F5} % pale purple +\definecolor{twbGray} {HTML}{4A4A4A} % body text +\definecolor{codeBack} {HTML}{F5F5F5} % code background +\definecolor{codeFore} {HTML}{1A1A1A} % code text + +% ---- Code listings ----------------------------------------------------------- +\usepackage{listings} +\lstset{ + language=R, + basicstyle=\ttfamily\scriptsize\color{codeFore}, + backgroundcolor=\color{codeBack}, + frame=none, + breaklines=true, + breakatwhitespace=true, + tabsize=2, + keepspaces=true, + showstringspaces=false, + keywordstyle=\color{twbMid}\bfseries, + commentstyle=\color{twbGray}\itshape, + stringstyle=\color{twbGreen}, + literate={<-}{{{\color{twbOrange}<-}}}2 + {|}{{{\color{twbOrange}|}}}1, +} + +% ---- Boxes ------------------------------------------------------------------- +\usepackage[most]{tcolorbox} + +% Section header box +\newtcolorbox{secbox}[2][twbBlue]{ + colback=#1, colframe=#1, + coltext=white, + fonttitle=\bfseries\small, + title=#2, + top=1pt, bottom=1pt, left=3pt, right=3pt, + boxsep=0pt, arc=2pt, + before skip=3pt, after skip=2pt +} + +% Sub-section box (lighter fill, dark border) +\newtcolorbox{subbox}[2][twbLight]{ + colback=#1, colframe=twbMid, + coltext=twbGray, + fonttitle=\bfseries\scriptsize\color{twbBlue}, + title=#2, + top=1pt, bottom=1pt, left=3pt, right=3pt, + boxsep=0pt, arc=2pt, + before skip=2pt, after skip=1pt, + leftrule=2pt, rightrule=0pt, toprule=0pt, bottomrule=0pt +} + +% Tip / note box +\newtcolorbox{tipbox}{ + colback=twbOrangeL, colframe=twbOrange, + coltext=twbGray, + fonttitle=\bfseries\scriptsize\color{twbOrange}, + title=TIP, + top=1pt, bottom=1pt, left=3pt, right=3pt, + boxsep=0pt, arc=2pt, + before skip=2pt, after skip=1pt, + leftrule=2pt, rightrule=0pt, toprule=0pt, bottomrule=0pt +} + +% ---- Layout helpers ---------------------------------------------------------- +\usepackage{multicol} +\setlength{\columnsep}{4mm} +\setlength{\columnseprule}{0.3pt} +\def\columnseprulecolor{\color{twbMid!40}} + +\usepackage{parskip} +\setlength{\parskip}{1pt} +\setlength{\parindent}{0pt} + +\usepackage{enumitem} +\setlist[itemize]{ + leftmargin=8pt, itemsep=0pt, parsep=0pt, topsep=0pt, + label=\textcolor{twbMid}{\textbullet} +} + +% ---- Hyperref ---------------------------------------------------------------- +\usepackage{hyperref} +\hypersetup{colorlinks, urlcolor=twbMid, linkcolor=twbMid} + +% ---- Helpers ----------------------------------------------------------------- +\newcommand{\fn}[1]{\texttt{\textbf{\color{twbBlue}#1}}} +\newcommand{\pkg}[1]{\texttt{\color{twbGreen}#1}} +\newcommand{\ret}[1]{\textit{\scriptsize\color{twbGray}→ #1}} +\newcommand{\opt}[1]{\textcolor{twbOrange}{\texttt{#1}}} +\newcommand{\sep}{\vspace{1pt}\hrule\vspace{2pt}} + +\pagestyle{empty} + +% ============================================================================== +\begin{document} +% ============================================================================== + +% ---- Header ------------------------------------------------------------------ +\begin{tcolorbox}[ + colback=twbBlue, colframe=twbBlue, + coltext=white, arc=3pt, + top=4pt, bottom=4pt, left=6pt, right=6pt, + before skip=0pt, after skip=4pt +] + \begin{minipage}{0.72\linewidth} + {\Large\bfseries twbparser}\quad + {\normalsize\color{twbLight} v0.4.0 \quad|\quad Parse Tableau .twb/.twbx files into tidy data}\\[1pt] + {\scriptsize\color{twbLight}% + \texttt{pak::pak("PrigasG/twbparser")} \quad + \texttt{devtools::install\_github("PrigasG/twbparser")} \quad + \href{https://prigasg.github.io/twbparser/}{prigasg.github.io/twbparser}} + \end{minipage}% + \hfill + \begin{minipage}{0.26\linewidth}\raggedleft + {\scriptsize\color{twbLight} + All \texttt{twb\_*()} functions accept\\ + a \pkg{TwbParser} object \textbf{or} an \texttt{xml2} document.\\ + Active bindings available as \texttt{parser\$property}.} + \end{minipage} +\end{tcolorbox} + +% ---- Three-column body ------------------------------------------------------- +\begin{multicols}{3} + +%% ============================================================ +%% COLUMN 1 +%% ============================================================ + +% ---- Getting Started --------------------------------------------------------- +\begin{secbox}[twbBlue]{Getting Started} +\end{secbox} + +\begin{lstlisting} +library(twbparser) + +# Parse a .twb or .twbx file +parser <- TwbParser$new("dashboard.twbx") + +# Quick console summary (no parens needed) +parser$summary + +# 1-row overview tibble +parser$overview +\end{lstlisting} + +\begin{subbox}[twbLight]{Key active bindings (no parens)} +\begin{lstlisting} +parser$overview # 1-row summary tibble +parser$pages # all pages +parser$pages_summary # per-page counts +parser$dashboard_summary # per-dashboard stats +parser$datasources # datasource details +parser$parameters_tbl # parameter fields +parser$fields_tbl # raw fields +\end{lstlisting} +\end{subbox} + +\sep + +% ---- Data Model -------------------------------------------------------------- +\begin{secbox}[twbMid]{Data Model} +\end{secbox} + +\textbf{Datasources \& parameters} + +\begin{lstlisting} +# Named connections (server, file, db ...) +parser$get_datasources() # -> tibble +parser$get_parameters() # -> tibble +parser$get_datasources_all() # combined + +# Detect published vs. embedded sources +twb_published_refs(parser) +\end{lstlisting} + +\textbf{Fields} + +\begin{lstlisting} +# All raw (non-calc) fields +parser$get_fields() + +# Calculated fields + formulas +parser$get_calculated_fields( + pretty = TRUE, # add formula_pretty + strip_brackets = FALSE, + wrap = 100L, + include_parameters = FALSE +) + +# Low-level helpers (accept xml_doc) +extract_raw_fields(xml_doc) +extract_calculated_fields(xml_doc) +extract_parameters(xml_doc) +extract_columns_with_table_source(xml_doc) +\end{lstlisting} + +\textbf{SQL} + +\begin{lstlisting} +twb_custom_sql(parser) # Custom SQL blocks +twb_initial_sql(parser) # Initial SQL per conn +\end{lstlisting} + +\sep + +% ---- Relationships & Joins --------------------------------------------------- +\begin{secbox}[twbGreen]{Relationships \& Joins} +\end{secbox} + +\begin{lstlisting} +# Modern logical-model relationships (2020.2+) +parser$get_relationships() # -> tibble + +# Legacy SQL joins +parser$get_joins() # -> tibble +parser$get_relations() # raw nodes + +# Infer implicit pairs by field-name / role +infer_implicit_relationships( + fields_df, # from get_fields() + max_pairs = 50000L +) + +# Validate endpoints + predicate fields +validate_relationships(parser, strict = FALSE) +\end{lstlisting} + +\begin{subbox}[twbGreenL]{Returned columns} +\begin{lstlisting} +# get_relationships() -> +# left_table | left_field | operator +# right_table | right_field | datasource_left + +# get_joins() -> +# left_table | left_field | right_table +# right_field | join_type | operator +\end{lstlisting} +\end{subbox} + +%% ============================================================ +%% COLUMN 2 +%% ============================================================ +\columnbreak + +% ---- Worksheet Intelligence -------------------------------------------------- +\begin{secbox}[twbPurple]{Worksheet Intelligence} +\end{secbox} + +All four accept \opt{sheet = "Sheet1"} to restrict to one worksheet. + +\textbf{Shelf assignments} + +\begin{lstlisting} +twb_sheet_shelves(parser) +# sheet | shelf | field_clean | datasource +# shelf: "rows","cols","color","size", +# "label","detail","tooltip","shape" + +# also: parser$get_sheet_shelves() +# parser$sheet_shelves +\end{lstlisting} + +\textbf{Worksheet filters} + +\begin{lstlisting} +twb_sheet_filters(parser, sheet = "Sales") +# sheet | field_clean | datasource +# filter_class | include_mode +# members | range_min | range_max +\end{lstlisting} + +\textbf{Axis configuration} + +\begin{lstlisting} +twb_sheet_axes(parser) +# sheet | axis | field_clean +# scale_type | reversed | include_zero +\end{lstlisting} + +\textbf{Sort directives} + +\begin{lstlisting} +twb_sheet_sorts(parser) +# sheet | field_clean | datasource +# sort_order | sort_by +# sort_by: "field","alphabetic","manual" +\end{lstlisting} + +\sep + +% ---- Dashboard Intelligence -------------------------------------------------- +\begin{secbox}[twbPurple]{Dashboard Intelligence} +\end{secbox} + +All three accept \opt{dashboard = "Overview"} to scope to one dashboard. + +\textbf{Sheet positions} + +\begin{lstlisting} +twb_dashboard_sheets(parser) +# dashboard | sheet | zone_id +# x | y | w | h (pixel coords) + +twb_dashboards(parser) # high-level overview +twb_dashboard_summary(parser) # filter + chart counts +\end{lstlisting} + +\textbf{Full zone layout tree} + +\begin{lstlisting} +twb_dashboard_layout(parser, dashboard = "Exec") +# dashboard | zone_id | parent_zone_id +# component_type | layout_type | x | y | w | h +# component_type: "worksheet","filter", +# "container","legend","parameter_control", +# "text","image","blank" +# layout_type: "tiled" or "floating" +\end{lstlisting} + +\textbf{Actions} + +\begin{lstlisting} +twb_dashboard_actions(parser) +# action_name | action_type | source_sheets +# target_sheet | run_on | url +# action_type: "filter","url","highlight","parameter" +# run_on: "select","menu","hover" +\end{lstlisting} + +\textbf{Filters on dashboards} + +\begin{lstlisting} +twb_dashboard_filters(parser) # filter positions +twb_page_composition(parser, "Executive Dashboard") +twb_pages(parser) # all pages list +twb_pages_summary(parser) # per-page stats +\end{lstlisting} + +\sep + +% ---- Chart types & Colors ---------------------------------------------------- +\begin{secbox}[twbMid]{Mark Types \& Colors} +\end{secbox} + +\begin{lstlisting} +twb_charts(parser) # worksheet | mark_types +twb_colors(parser) # palette_name | kind | colors +\end{lstlisting} + +%% ============================================================ +%% COLUMN 3 +%% ============================================================ +\columnbreak + +% ---- Analytics (Phase 4) ---------------------------------------------------- +\begin{secbox}[twbOrange]{Analytics — Port to Another Tool} +\end{secbox} + +\textbf{Calculated field complexity} + +\begin{lstlisting} +twb_calc_complexity(parser) +# datasource | name | datatype | role +# calc_type | lod_type | is_table_calc +# dep_depth | n_deps | formula +# +# calc_type values (precedence order): +# "lod" FIXED/INCLUDE/EXCLUDE +# "table_calc" RUNNING_SUM, WINDOW_*, RANK, ... +# "aggregate" SUM, COUNT, MEDIAN, ... +# "raw" string ops, IF/THEN, literals +# +# lod_type: "fixed" | "include" | "exclude" | NA +# dep_depth: calc-on-calc chain length (0 = raw deps) + +# also: parser$get_calc_complexity() +# parser$calc_complexity +\end{lstlisting} + +\textbf{Field usage matrix} + +\begin{lstlisting} +# Long form (default) +twb_field_usage(parser) +# field_clean | datasource | sheet +# context | n_appearances +# context: "shelf:rows","shelf:color","filter",... + +# Wide form — one column per sheet +twb_field_usage(parser, wide = TRUE) + +# Limit to shelves or filters only +twb_field_usage(parser, include_filters = FALSE) +twb_field_usage(parser, include_shelves = FALSE) + +# also: parser$get_field_usage(wide = TRUE) +# parser$field_usage +\end{lstlisting} + +\textbf{Full replication brief} + +\begin{lstlisting} +# Named list — all 11 sections assembled +brief <- twb_replication_brief(parser) +names(brief) +# meta | datasources | parameters | custom_sql +# calculated_fields | field_usage | filters +# sorts | chart_types | dashboard_layout | actions + +# Scope to one dashboard +brief <- twb_replication_brief( + parser, + dashboard = "Overview", + include_sql = TRUE, + include_formulas = TRUE # adds formula_pretty +) + +# Human-readable text output +cat(twb_replication_brief(parser, format = "text")) + +# also: parser$get_replication_brief(dashboard = "X") +\end{lstlisting} + +\sep + +% ---- Dependency Graphs ------------------------------------------------------- +\begin{secbox}[twbGreen]{Dependency Graphs} +\end{secbox} + +\begin{lstlisting} +calcs <- parser$get_calculated_fields() + +# Build igraph DAG (input -> output field) +g <- build_dependency_graph(calcs) + +# Plot with base graphics +plot_dependency_graph(g, calcs, seed = 42) + +# Plot field-level relationship DAG +plot_relationship_graph( + parser$get_relationships(), seed = 42) + +# Plot source join structure +plot_source_join_graph( + parser$get_joins(), + relationships_df = parser$get_relationships()) +\end{lstlisting} + +\sep + +% ---- TWBX Handling ----------------------------------------------------------- +\begin{secbox}[twbBlue]{TWBX Handling} +\end{secbox} + +\begin{lstlisting} +# List all files in the archive +twbx_list("dashboard.twbx") +# Name | Length | Date | type +# type: "workbook","extract","image","text",... + +# Extract the embedded .twb +extract_twb_from_twbx( + "dashboard.twbx", + extract_dir = tempdir()) + +# Extract specific files by type / pattern +twbx_extract_files( + "dashboard.twbx", + types = "image", # or "extract" + pattern = "^Data/.*\\.hyper$", + exdir = "out/") + +# TwbParser handles .twbx automatically +parser <- TwbParser$new("dashboard.twbx") +parser$twbx_manifest # contents tibble +parser$get_twbx_extracts() +parser$get_twbx_images() +\end{lstlisting} + +\sep + +% ---- Formula Utilities ------------------------------------------------------- +\begin{secbox}[twbMid]{Formula Utilities} +\end{secbox} + +\begin{lstlisting} +# Pretty-print a single formula string +tableau_formula_pretty( + "IF [Sales] > 0 THEN [Sales] ELSE 0 END", + strip_brackets = FALSE, + wrap = 80L) + +# Add formula_pretty column to a tibble +prettify_calculated_fields( + calcs, # tibble with 'formula' col + strip_brackets = FALSE, + wrap = 100L) +\end{lstlisting} + +\sep + +% ---- Tableau Server (optional) ----------------------------------------------- +\begin{secbox}[twbGray]{Tableau Server / Cloud (optional)} +\end{secbox} + +\begin{lstlisting} +# Publish info via REST API +tbs_publish_info( + content_id = "abc123", + base_url = "https://server", + site = "MySite", + token = Sys.getenv("TABLEAU_TOKEN")) + +# Custom SQL via Metadata GraphQL API +tbs_custom_sql_graphql( + content_id = "abc123", + base_url = "https://server", + site = "MySite", + token = Sys.getenv("TABLEAU_TOKEN")) +\end{lstlisting} + +\begin{tipbox} +\textbf{All \texttt{twb\_*()} functions} also work with a raw +\texttt{xml2} document: \texttt{xml <- xml2::read\_xml("file.twb")}. +Use the \texttt{TwbParser} class for caching, \texttt{.twbx} support, +and the no-parens active bindings. +\end{tipbox} + +\end{multicols} + +% ---- Footer ------------------------------------------------------------------ +\begin{tcolorbox}[ + colback=twbBlue!10, colframe=twbBlue!30, + coltext=twbGray, arc=2pt, + top=2pt, bottom=2pt, left=4pt, right=4pt, + before skip=2pt, after skip=0pt +] + \scriptsize + \textbf{twbparser} \textcolor{twbMid}{0.4.0} \quad + MIT License \quad + \href{https://github.com/PrigasG/twbparser}{github.com/PrigasG/twbparser} \quad + \href{https://prigasg.github.io/twbparser/}{prigasg.github.io/twbparser} \quad + Bugs: \href{https://github.com/PrigasG/twbparser/issues}{github.com/PrigasG/twbparser/issues} + \hfill + \textit{Cheatsheet compiled with \LaTeX{} — see \texttt{inst/cheatsheet/}} +\end{tcolorbox} + +\end{document} diff --git a/man/TwbParser.Rd b/man/TwbParser.Rd index a0b7052..fd494bf 100644 --- a/man/TwbParser.Rd +++ b/man/TwbParser.Rd @@ -20,7 +20,22 @@ An R6 class generator. \item{sheet}{Optional worksheet name.} -\item{dashboard}{Optional dashboard name to filter by.} +\item{include_parameters}{Logical; include parameter fields. Default \code{FALSE}.} + +\item{include_filters}{Include filter appearances. Default \code{TRUE}.} + +\item{include_shelves}{Include shelf appearances. Default \code{TRUE}.} + +\item{wide}{Return wide format (one col per sheet). Default \code{FALSE}.} + +\item{dashboard}{Optional dashboard name to scope the brief.} + +\item{include_sql}{Include custom SQL blocks. Default \code{TRUE}.} + +\item{include_formulas}{Add \code{formula_pretty} to calculated fields. +Default \code{TRUE}.} + +\item{format}{\code{"list"} (default) or \code{"text"}.} \item{error}{If \code{TRUE}, \code{stop()} when validation fails.} } @@ -49,6 +64,12 @@ Full zone layout with container hierarchy. Dashboard and workbook actions. +Calculated field complexity classifications. + +Field usage matrix across worksheets. + +Full replication brief for the workbook or a single dashboard. + Validate relationships; optionally stop on failure. Print a concise summary of parsed content. diff --git a/man/twb_calc_complexity.Rd b/man/twb_calc_complexity.Rd new file mode 100644 index 0000000..6af6a61 --- /dev/null +++ b/man/twb_calc_complexity.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analytics.R +\name{twb_calc_complexity} +\alias{twb_calc_complexity} +\title{Classify calculated fields by complexity} +\usage{ +twb_calc_complexity(x, include_parameters = FALSE) +} +\arguments{ +\item{x}{A \code{TwbParser} object or an \code{xml2} document.} + +\item{include_parameters}{Logical; if \code{TRUE}, include parameter fields +(they always land in \code{calc_type = "raw"} and \code{dep_depth = 0}). +Default \code{FALSE}.} +} +\value{ +A tibble with columns: +\describe{ +\item{datasource}{Datasource the field belongs to.} +\item{name}{Human-readable field name.} +\item{tableau_internal_name}{Bracketed internal Tableau name.} +\item{datatype}{Field data type.} +\item{role}{\code{"measure"} or \code{"dimension"}.} +\item{calc_type}{One of \code{"lod"}, \code{"table_calc"}, \code{"aggregate"}, \code{"raw"}. +Tested in that precedence order.} +\item{lod_type}{\code{"fixed"}, \code{"include"}, or \code{"exclude"}; \code{NA} if not LOD.} +\item{is_table_calc}{Logical; existing heuristic flag preserved for +backward compatibility.} +\item{dep_depth}{Integer; longest chain of calc-on-calc dependencies. +\code{0} means the field only references raw fields (or has no references).} +\item{n_deps}{Integer; count of distinct bracketed tokens in the formula.} +\item{formula}{Raw formula string.} +} +} +\description{ +Returns every calculated field in the workbook enriched with a computation +category (\code{calc_type}), LOD sub-type, dependency count, and dependency depth +— the maximum number of calc-on-calc hops in the field's dependency chain. +} +\examples{ +twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") +stopifnot(nzchar(twb), file.exists(twb)) +xml <- xml2::read_xml(twb) +twb_calc_complexity(xml) + +} diff --git a/man/twb_custom_sql.Rd b/man/twb_custom_sql.Rd index 71dac97..ac3ef38 100644 --- a/man/twb_custom_sql.Rd +++ b/man/twb_custom_sql.Rd @@ -2,16 +2,31 @@ % Please edit documentation in R/sql.R \name{twb_custom_sql} \alias{twb_custom_sql} -\title{Extract Custom SQL relations from a TWB XML} +\title{Extract Custom SQL relations from a Tableau workbook} \usage{ -twb_custom_sql(xml_doc) +twb_custom_sql(x) } \arguments{ -\item{xml_doc}{An xml2 document for a .twb} +\item{x}{A \code{TwbParser} object \strong{or} an \code{xml2} document.} } \value{ -tibble with relation_name, relation_type, custom_sql +A tibble with columns: +\describe{ +\item{relation_name}{Name attribute of the relation node.} +\item{relation_type}{Type attribute (e.g. \code{"text"}, \code{"table"}).} +\item{custom_sql}{Full SQL text.} +\item{is_custom_sql}{\code{TRUE} when the text begins with \code{SELECT} or \code{WITH}.} +} } \description{ -Extract Custom SQL relations from a TWB XML +Finds every \verb{} node that looks like a SQL statement +and returns its name, type, raw SQL text, and a flag for whether it starts +with \code{SELECT} or \code{WITH}. +} +\examples{ +twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") +stopifnot(nzchar(twb), file.exists(twb)) +xml <- xml2::read_xml(twb) +twb_custom_sql(xml) + } diff --git a/man/twb_field_usage.Rd b/man/twb_field_usage.Rd new file mode 100644 index 0000000..29b6551 --- /dev/null +++ b/man/twb_field_usage.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analytics.R +\name{twb_field_usage} +\alias{twb_field_usage} +\title{Field usage matrix across worksheets} +\usage{ +twb_field_usage( + x, + include_filters = TRUE, + include_shelves = TRUE, + wide = FALSE +) +} +\arguments{ +\item{x}{A \code{TwbParser} object or an \code{xml2} document.} + +\item{include_filters}{Logical; include filter appearances. Default \code{TRUE}.} + +\item{include_shelves}{Logical; include shelf appearances (rows, cols, color, +size, etc.). Default \code{TRUE}.} + +\item{wide}{Logical; if \code{TRUE}, pivot to one row per field with one column +per sheet containing a comma-separated list of contexts, or \code{NA} if the +field does not appear on that sheet. Default \code{FALSE}.} +} +\value{ +\strong{Long form} (\code{wide = FALSE}): a tibble with columns: +\describe{ +\item{field_clean}{Human-readable field name.} +\item{datasource}{Datasource the field belongs to.} +\item{sheet}{Worksheet name.} +\item{context}{Usage context, e.g. \code{"shelf:rows"}, \code{"shelf:color"}, +\code{"filter"}.} +\item{n_appearances}{Number of times the field appears in this context on +this sheet (handles multi-pill rows/cols).} +} + +\strong{Wide form} (\code{wide = TRUE}): one row per \verb{(field_clean, datasource)}, +one column per sheet, cell value is a comma-separated context string or +\code{NA}. +} +\description{ +Combines shelf placement and filter usage into a tidy long tibble showing +where each field appears and in what capacity across all (or selected) +worksheets. +} +\examples{ +twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") +stopifnot(nzchar(twb), file.exists(twb)) +xml <- xml2::read_xml(twb) +twb_field_usage(xml) +twb_field_usage(xml, wide = TRUE) + +} diff --git a/man/twb_initial_sql.Rd b/man/twb_initial_sql.Rd index 63a1cd2..7075da3 100644 --- a/man/twb_initial_sql.Rd +++ b/man/twb_initial_sql.Rd @@ -2,16 +2,28 @@ % Please edit documentation in R/sql.R \name{twb_initial_sql} \alias{twb_initial_sql} -\title{Extract Initial SQL statements from connections (if present)} +\title{Extract Initial SQL statements from Tableau connections} \usage{ -twb_initial_sql(xml_doc) +twb_initial_sql(x) } \arguments{ -\item{xml_doc}{An xml2 document for a .twb} +\item{x}{A \code{TwbParser} object \strong{or} an \code{xml2} document.} } \value{ -tibble with connection_id, initial_sql +A tibble with columns: +\describe{ +\item{connection_id}{Name or caption of the parent connection element.} +\item{initial_sql}{SQL text of the initial statement.} +} } \description{ -Extract Initial SQL statements from connections (if present) +Returns any \verb{} nodes found inside connection or +named-connection elements. +} +\examples{ +twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") +stopifnot(nzchar(twb), file.exists(twb)) +xml <- xml2::read_xml(twb) +twb_initial_sql(xml) + } diff --git a/man/twb_published_refs.Rd b/man/twb_published_refs.Rd index 1d24670..9e0272b 100644 --- a/man/twb_published_refs.Rd +++ b/man/twb_published_refs.Rd @@ -2,16 +2,32 @@ % Please edit documentation in R/published.R \name{twb_published_refs} \alias{twb_published_refs} -\title{Detect likely references to published data sources (vs embedded)} +\title{Detect references to published data sources} \usage{ -twb_published_refs(xml_doc) +twb_published_refs(x) } \arguments{ -\item{xml_doc}{An xml2 document for a .twb} +\item{x}{A \code{TwbParser} object \strong{or} an \code{xml2} document.} } \value{ -tibble with datasource name, caption, likely_published, hints +A tibble with columns: +\describe{ +\item{name}{Internal datasource name.} +\item{caption}{User-visible caption.} +\item{hasconn}{Value of the \code{hasconnection} attribute.} +\item{likely_published}{\code{TRUE} when \code{hasconnection = false} or when the +node text contains published-source markers.} +\item{hints}{Short explanation of the classification.} +} } \description{ -Detect likely references to published data sources (vs embedded) +Inspects datasource nodes and heuristically flags those that reference a +published (server-side) source rather than an embedded one. +} +\examples{ +twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") +stopifnot(nzchar(twb), file.exists(twb)) +xml <- xml2::read_xml(twb) +twb_published_refs(xml) + } diff --git a/man/twb_replication_brief.Rd b/man/twb_replication_brief.Rd new file mode 100644 index 0000000..e1e1f04 --- /dev/null +++ b/man/twb_replication_brief.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analytics.R +\name{twb_replication_brief} +\alias{twb_replication_brief} +\title{Replication brief for a Tableau workbook or dashboard} +\usage{ +twb_replication_brief( + x, + dashboard = NULL, + include_sql = TRUE, + include_formulas = TRUE, + format = c("list", "text") +) +} +\arguments{ +\item{x}{A \code{TwbParser} object or an \code{xml2} document.} + +\item{dashboard}{Optional character scalar. When supplied, sheet-level +sections (filters, sorts, chart types, field usage, layout) are scoped to +the sheets that belong to this dashboard.} + +\item{include_sql}{Logical; include custom SQL blocks in \verb{$custom_sql}. +Default \code{TRUE}.} + +\item{include_formulas}{Logical; when \code{TRUE}, a \code{formula_pretty} column is +added to \verb{$calculated_fields}. Default \code{TRUE}.} + +\item{format}{Either \code{"list"} (default) to return a named R list, or +\code{"text"} to return a single formatted character string suitable for +printing or writing to a file.} +} +\value{ +\strong{\code{format = "list"}}: a named list with elements: +\describe{ +\item{meta}{1-row tibble: file name, counts, generation timestamp.} +\item{datasources}{Datasource connection details.} +\item{parameters}{Parameter fields with current values.} +\item{custom_sql}{Custom SQL blocks, or \code{NULL} if \code{include_sql = FALSE}.} +\item{calculated_fields}{Tibble from \code{\link[=twb_calc_complexity]{twb_calc_complexity()}}, optionally +with a \code{formula_pretty} column.} +\item{field_usage}{Tibble from \code{\link[=twb_field_usage]{twb_field_usage()}}.} +\item{filters}{Worksheet filters (scoped to \code{dashboard} if given).} +\item{sorts}{Worksheet sorts (scoped to \code{dashboard} if given).} +\item{chart_types}{Mark types per worksheet.} +\item{dashboard_layout}{Zone positions from \code{\link[=twb_dashboard_sheets]{twb_dashboard_sheets()}}.} +\item{actions}{Dashboard actions from \code{\link[=twb_dashboard_actions]{twb_dashboard_actions()}}.} +} + +\strong{\code{format = "text"}}: a single \code{character(1)} with section headers and +tabular output. +} +\description{ +Assembles all extracted intelligence — datasources, parameters, calculated +fields with complexity classifications, field usage, filters, sorts, chart +types, dashboard layout, and actions — into a single named list (or +formatted text) ready for use when porting to another visualisation tool. +} +\examples{ +twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") +stopifnot(nzchar(twb), file.exists(twb)) +xml <- xml2::read_xml(twb) +brief <- twb_replication_brief(xml) +names(brief) +brief$meta + +} diff --git a/tests/testthat/test-analytics.R b/tests/testthat/test-analytics.R new file mode 100644 index 0000000..8c1feee --- /dev/null +++ b/tests/testthat/test-analytics.R @@ -0,0 +1,355 @@ +## Tests for Phase 4: twb_calc_complexity, twb_field_usage, twb_replication_brief + +# ---- helpers ----------------------------------------------------------------- + +.make_calc_xml <- function(...) { + calcs <- list(...) + col_nodes <- vapply(calcs, function(c) { + caption_attr <- if (!is.null(c$caption)) sprintf(' caption="%s"', c$caption) else "" + sprintf( + ' + + ', + c$name, caption_attr, + c$datatype %||% "string", c$role %||% "measure", + gsub('"', """, c$formula %||% "") + ) + }, character(1L)) + xml2::read_xml(sprintf( + ' + + %s + + + ', + paste(col_nodes, collapse = "\n") + )) +} + +`%||%` <- function(a, b) if (!is.null(a)) a else b + +# ---- twb_calc_complexity: columns ------------------------------------------- + +test_that("twb_calc_complexity returns expected columns", { + xml <- .make_calc_xml( + list(name = "[Sales]", formula = "SUM([Price])"), + list(name = "[Count]", formula = "COUNT([Orders])") + ) + out <- twb_calc_complexity(xml) + expected_cols <- c("datasource", "name", "tableau_internal_name", + "datatype", "role", "calc_type", "lod_type", + "is_table_calc", "dep_depth", "n_deps", "formula") + expect_true(all(expected_cols %in% names(out))) +}) + +# ---- twb_calc_complexity: LOD detection ------------------------------------- + +test_that("twb_calc_complexity detects FIXED LOD", { + xml <- .make_calc_xml( + list(name = "[FixedSales]", formula = "{ FIXED [Category] : SUM([Sales]) }") + ) + out <- twb_calc_complexity(xml) + expect_equal(out$calc_type, "lod") + expect_equal(out$lod_type, "fixed") +}) + +test_that("twb_calc_complexity detects INCLUDE LOD", { + xml <- .make_calc_xml( + list(name = "[IncSales]", formula = "{ INCLUDE [Category] : SUM([Sales]) }") + ) + out <- twb_calc_complexity(xml) + expect_equal(out$calc_type, "lod") + expect_equal(out$lod_type, "include") +}) + +test_that("twb_calc_complexity detects EXCLUDE LOD", { + xml <- .make_calc_xml( + list(name = "[ExclSales]", formula = "{ EXCLUDE [Category] : SUM([Sales]) }") + ) + out <- twb_calc_complexity(xml) + expect_equal(out$calc_type, "lod") + expect_equal(out$lod_type, "exclude") +}) + +# ---- twb_calc_complexity: table_calc detection ------------------------------ + +test_that("twb_calc_complexity detects table calcs", { + xml <- .make_calc_xml( + list(name = "[Running]", formula = "RUNNING_SUM(SUM([Sales]))") + ) + out <- twb_calc_complexity(xml) + expect_equal(out$calc_type, "table_calc") + expect_true(out$is_table_calc) +}) + +# ---- twb_calc_complexity: aggregate detection ------------------------------- + +test_that("twb_calc_complexity classifies SUM as aggregate", { + xml <- .make_calc_xml( + list(name = "[TotalSales]", formula = "SUM([Sales])") + ) + out <- twb_calc_complexity(xml) + expect_equal(out$calc_type, "aggregate") + expect_true(is.na(out$lod_type)) +}) + +test_that("twb_calc_complexity classifies COUNTD as aggregate", { + xml <- .make_calc_xml( + list(name = "[UniqueCustomers]", formula = "COUNTD([Customer ID])") + ) + out <- twb_calc_complexity(xml) + expect_equal(out$calc_type, "aggregate") +}) + +# ---- twb_calc_complexity: raw classification -------------------------------- + +test_that("twb_calc_complexity classifies string concat as raw", { + xml <- .make_calc_xml( + list(name = "[FullName]", formula = "[First] + " " + [Last]", + datatype = "string") + ) + out <- twb_calc_complexity(xml) + expect_equal(out$calc_type, "raw") + expect_true(is.na(out$lod_type)) +}) + +# ---- twb_calc_complexity: LOD wins over table_calc precedence --------------- + +test_that("LOD wins over table_calc when both patterns match", { + # Contrived formula with both patterns + xml <- .make_calc_xml( + list(name = "[Weird]", + formula = "{ FIXED [Cat] : RUNNING_SUM(SUM([Sales])) }") + ) + out <- twb_calc_complexity(xml) + expect_equal(out$calc_type, "lod") +}) + +# ---- twb_calc_complexity: dep_depth ----------------------------------------- + +test_that("dep_depth is 0 for direct formula with no calc deps", { + xml <- .make_calc_xml( + list(name = "[Revenue]", formula = "SUM([Sales])") + ) + out <- twb_calc_complexity(xml) + expect_equal(out$dep_depth, 0L) +}) + +test_that("dep_depth is 1 for calc depending on another calc", { + xml <- .make_calc_xml( + list(name = "[Revenue]", formula = "SUM([Sales])"), + list(name = "[Revenue2]", formula = "[Revenue] * 2") + ) + out <- twb_calc_complexity(xml) + revenue2 <- out[out$name == "Revenue2", ] + expect_equal(revenue2$dep_depth, 1L) +}) + +test_that("dep_depth is 2 for three-level calc chain", { + xml <- .make_calc_xml( + list(name = "[A]", formula = "SUM([x])"), + list(name = "[B]", formula = "[A] * 2"), + list(name = "[C]", formula = "[B] + 1") + ) + out <- twb_calc_complexity(xml) + cc <- out[out$name == "C", ] + expect_equal(cc$dep_depth, 2L) +}) + +# ---- twb_calc_complexity: empty workbook ------------------------------------ + +test_that("twb_calc_complexity returns typed empty tibble for no calcs", { + xml <- xml2::read_xml( + '' + ) + out <- twb_calc_complexity(xml) + expect_equal(nrow(out), 0L) + expect_true("calc_type" %in% names(out)) + expect_true("dep_depth" %in% names(out)) +}) + +# ---- twb_field_usage: basic columns ----------------------------------------- + +test_that("twb_field_usage returns expected columns (long form)", { + xml <- xml2::read_xml( + ' + + + + + + + + + + + + + + + + [ds1].[none:Sales:qk] + +
+
+
+
' + ) + out <- twb_field_usage(xml) + expect_true(all(c("field_clean", "datasource", "sheet", + "context", "n_appearances") %in% names(out))) +}) + +test_that("twb_field_usage wide=TRUE returns one row per field", { + xml <- xml2::read_xml( + ' + + + + [ds1].[none:Revenue:qk] + +
+
+ + + [ds1].[none:Revenue:qk] + +
+
+
+
' + ) + out_long <- twb_field_usage(xml, wide = FALSE) + out_wide <- twb_field_usage(xml, wide = TRUE) + # Wide has no sheet / context / n_appearances columns + expect_false("context" %in% names(out_wide)) + expect_false("n_appearances" %in% names(out_wide)) + # Each unique (field_clean, datasource) becomes one row + expect_equal(nrow(out_wide), nrow(dplyr::distinct(out_long, field_clean, datasource))) +}) + +test_that("twb_field_usage include_shelves=FALSE shows only filters", { + xml <- xml2::read_xml( + ' + + + + [ds1].[none:Revenue:qk] + + + +
+
+
+
' + ) + out <- twb_field_usage(xml, include_shelves = FALSE) + if (nrow(out) > 0L) + expect_true(all(out$context == "filter")) +}) + +test_that("twb_field_usage returns empty tibble when both FALSE", { + xml <- xml2::read_xml('') + expect_message( + out <- twb_field_usage(xml, include_filters = FALSE, include_shelves = FALSE), + "both FALSE" + ) + expect_equal(nrow(out), 0L) +}) + +test_that("twb_field_usage empty workbook returns empty tibble", { + xml <- xml2::read_xml('') + out <- twb_field_usage(xml) + expect_equal(nrow(out), 0L) + expect_true(all(c("field_clean", "datasource", "sheet", "context", + "n_appearances") %in% names(out))) +}) + +# ---- twb_replication_brief: list structure ---------------------------------- + +test_that("twb_replication_brief returns a list with expected elements", { + twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") + skip_if_not(nzchar(twb) && file.exists(twb), "example .twb not found") + xml <- xml2::read_xml(twb) + brief <- twb_replication_brief(xml) + expect_type(brief, "list") + expected_keys <- c("meta", "datasources", "parameters", "custom_sql", + "calculated_fields", "field_usage", "filters", "sorts", + "chart_types", "dashboard_layout", "actions") + expect_true(all(expected_keys %in% names(brief))) +}) + +test_that("twb_replication_brief meta has correct columns and types", { + twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") + skip_if_not(nzchar(twb) && file.exists(twb), "example .twb not found") + xml <- xml2::read_xml(twb) + meta <- twb_replication_brief(xml)$meta + expect_equal(nrow(meta), 1L) + expect_true(is.character(meta$workbook_file)) + expect_true(is.integer(meta$n_datasources)) + expect_true(is.integer(meta$n_worksheets)) + expect_true(is.character(meta$generated_at)) +}) + +test_that("twb_replication_brief format=text returns character(1)", { + twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") + skip_if_not(nzchar(twb) && file.exists(twb), "example .twb not found") + xml <- xml2::read_xml(twb) + txt <- twb_replication_brief(xml, format = "text") + expect_type(txt, "character") + expect_length(txt, 1L) + expect_match(txt, "REPLICATION BRIEF") + expect_match(txt, "CALCULATED FIELDS") +}) + +test_that("twb_replication_brief include_sql=FALSE sets custom_sql to NULL", { + twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") + skip_if_not(nzchar(twb) && file.exists(twb), "example .twb not found") + xml <- xml2::read_xml(twb) + brief <- twb_replication_brief(xml, include_sql = FALSE) + expect_null(brief$custom_sql) +}) + +test_that("twb_replication_brief include_formulas=TRUE adds formula_pretty", { + twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") + skip_if_not(nzchar(twb) && file.exists(twb), "example .twb not found") + xml <- xml2::read_xml(twb) + brief <- twb_replication_brief(xml, include_formulas = TRUE) + cf <- brief$calculated_fields + if (nrow(cf) > 0L) + expect_true("formula_pretty" %in% names(cf)) +}) + +# ---- TwbParser integration -------------------------------------------------- + +test_that("TwbParser exposes get_calc_complexity and active binding", { + twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") + skip_if_not(nzchar(twb) && file.exists(twb), "example .twb not found") + parser <- TwbParser$new(twb) + method_out <- parser$get_calc_complexity() + binding_out <- parser$calc_complexity + expect_s3_class(method_out, "tbl_df") + expect_identical(binding_out, method_out) +}) + +test_that("TwbParser exposes get_field_usage and active binding", { + twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") + skip_if_not(nzchar(twb) && file.exists(twb), "example .twb not found") + parser <- TwbParser$new(twb) + method_out <- parser$get_field_usage() + binding_out <- parser$field_usage + expect_s3_class(method_out, "tbl_df") + expect_identical(binding_out, method_out) +}) + +test_that("TwbParser get_replication_brief returns a list", { + twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") + skip_if_not(nzchar(twb) && file.exists(twb), "example .twb not found") + parser <- TwbParser$new(twb) + brief <- parser$get_replication_brief() + expect_type(brief, "list") + expect_true("meta" %in% names(brief)) + # workbook_file should be the basename of the .twb path + expect_equal(brief$meta$workbook_file, basename(twb)) +})