From 75ccdff8abbf55b0c7ebd9650d31e0529ce0a5cf Mon Sep 17 00:00:00 2001 From: George Arthur Date: Fri, 10 Apr 2026 23:42:07 -0400 Subject: [PATCH] Bump to 0.4.0: worksheet & dashboard intelligence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 — Bug fixes & consolidation: - Centralise clean_table/clean_field into .twb_clean_table()/.twb_clean_field() in utils.R; remove 4 duplicate copies across fields.R, calculated_fields.R, relationships.R, joins.R, dependency_graph.R - Fix plot_relationship_graph() edge direction (from used right_field, not left_field) - Fix plot_source_join_graph() column references (left_source → left_table) - Fix integer_ typo in insights.R zone dimension parsing - Add relationship = 'many-to-many' to inner_join calls in infer_implicit_relationships() Phase 2 — Per-worksheet intelligence (R/sheet_details.R): - twb_sheet_shelves(): rows, cols, and encoding shelf fields per worksheet - twb_sheet_filters(): categorical members, range min/max, include/exclude mode - twb_sheet_axes(): reversed, include-zero, scale-type per axis rule - twb_sheet_sorts(): sort direction and method per worksheet Phase 3 — Per-dashboard intelligence (R/dashboard_details.R): - twb_dashboard_sheets(): sheets on a dashboard with zone pixel positions - twb_dashboard_layout(): full zone tree with parent IDs and component types - twb_dashboard_actions(): filter and URL actions with source/target/trigger/url All 7 new functions wired into TwbParser R6 methods and active bindings. globals.R updated with all new NSE column names. Tests: test-cleaners.R, test-graph-bugs.R, test-sheet-details.R, test-dashboard-details.R (114 new assertions, all passing). Docs: NEWS.md, README.md, cran-comments.md, _pkgdown.yml, vignette updated. R CMD check: 0 errors | 0 warnings | 0 notes. Co-Authored-By: Claude Sonnet 4.6 --- .Rbuildignore | 2 + DESCRIPTION | 4 +- NAMESPACE | 10 + NEWS.md | 52 +++ R/active-bindings.R | 9 + R/calculated_fields.R | 18 +- R/dashboard_details.R | 367 +++++++++++++++ R/dependency_graph.R | 17 +- R/fields.R | 100 +---- R/globals.R | 32 +- R/insights.R | 2 +- R/joins.R | 41 +- R/relationships.R | 30 +- R/sheet_details.R | 571 ++++++++++++++++++++++++ R/twb_parser.R | 56 ++- R/utils.R | 59 +++ README.md | 43 +- _pkgdown.yml | 17 + cran-comments.md | 18 +- inst/WORDLIST | 14 + man/TwbParser.Rd | 18 + man/extract_datasource_details.Rd | 2 +- man/extract_twb_from_twbx.Rd | 2 +- man/tbs_custom_sql_graphql.Rd | 2 +- man/tbs_publish_info.Rd | 2 +- man/twb_dashboard_actions.Rd | 50 +++ man/twb_dashboard_layout.Rd | 52 +++ man/twb_dashboard_sheets.Rd | 45 ++ man/twb_sheet_axes.Rd | 49 ++ man/twb_sheet_filters.Rd | 54 +++ man/twb_sheet_shelves.Rd | 57 +++ man/twb_sheet_sorts.Rd | 44 ++ man/twbx_extract_files.Rd | 2 +- man/twbx_list.Rd | 2 +- tests/testthat/test-cleaners.R | 26 ++ tests/testthat/test-dashboard-details.R | 171 +++++++ tests/testthat/test-graph-bugs.R | 59 +++ tests/testthat/test-sheet-details.R | 182 ++++++++ vignettes/twbparser-intro.Rmd | 78 +++- 39 files changed, 2157 insertions(+), 202 deletions(-) create mode 100644 R/dashboard_details.R create mode 100644 R/sheet_details.R create mode 100644 man/twb_dashboard_actions.Rd create mode 100644 man/twb_dashboard_layout.Rd create mode 100644 man/twb_dashboard_sheets.Rd create mode 100644 man/twb_sheet_axes.Rd create mode 100644 man/twb_sheet_filters.Rd create mode 100644 man/twb_sheet_shelves.Rd create mode 100644 man/twb_sheet_sorts.Rd create mode 100644 tests/testthat/test-cleaners.R create mode 100644 tests/testthat/test-dashboard-details.R create mode 100644 tests/testthat/test-graph-bugs.R create mode 100644 tests/testthat/test-sheet-details.R diff --git a/.Rbuildignore b/.Rbuildignore index 11093e9..eb4e407 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -19,3 +19,5 @@ cran_examples_sanitizer.R ^check/ ^twbparser.Rcheck$ ^\.Rd2pdf20656$ +^\.git$ +^\.claude$ diff --git a/DESCRIPTION b/DESCRIPTION index b8c9d55..16211be 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: twbparser Title: Parse 'Tableau' Workbooks into Functional Data -Version: 0.3.1 +Version: 0.4.0 Authors@R: person("George", "Arthur", , "prigasgenthian48@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-1975-1459")) @@ -46,4 +46,4 @@ Encoding: UTF-8 Language: en-US LazyData: false Roxygen: list(markdown = TRUE, r6 = FALSE) -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 diff --git a/NAMESPACE b/NAMESPACE index 6aabffa..06c8a06 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,7 +23,10 @@ export(tbs_publish_info) export(twb_charts) export(twb_colors) export(twb_custom_sql) +export(twb_dashboard_actions) export(twb_dashboard_filters) +export(twb_dashboard_layout) +export(twb_dashboard_sheets) export(twb_dashboard_summary) export(twb_dashboards) export(twb_initial_sql) @@ -31,6 +34,10 @@ export(twb_page_composition) export(twb_pages) export(twb_pages_summary) export(twb_published_refs) +export(twb_sheet_axes) +export(twb_sheet_filters) +export(twb_sheet_shelves) +export(twb_sheet_sorts) export(twbx_extract_files) export(twbx_list) export(validate_relationships) @@ -63,6 +70,7 @@ importFrom(igraph,graph_from_data_frame) importFrom(igraph,layout_with_fr) importFrom(igraph,make_empty_graph) importFrom(purrr,map) +importFrom(purrr,map_chr) importFrom(purrr,map_dfr) importFrom(rlang,.data) importFrom(stringr,str_detect) @@ -82,7 +90,9 @@ importFrom(withr,with_seed) importFrom(xml2,read_xml) importFrom(xml2,xml_attr) importFrom(xml2,xml_attrs) +importFrom(xml2,xml_children) importFrom(xml2,xml_find_all) importFrom(xml2,xml_find_first) importFrom(xml2,xml_name) importFrom(xml2,xml_parent) +importFrom(xml2,xml_text) diff --git a/NEWS.md b/NEWS.md index 8d02567..55e5d61 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,55 @@ +# twbparser 0.4.0 + +## New features + +### Per-worksheet intelligence + +* `twb_sheet_shelves()` — extract every field placed on rows, cols, or an + encoding shelf (color, size, label, detail, tooltip) for one or all worksheets. +* `twb_sheet_filters()` — extract worksheet-level filters including categorical + member lists, range min/max, and include/exclude mode. +* `twb_sheet_axes()` — extract per-axis configuration: reversed, include-zero, + and scale type (linear, log, …). +* `twb_sheet_sorts()` — extract sort directives with sort direction and method + (field aggregate, alphabetic, manual, data-source order). +* All four are exposed on `TwbParser` as `get_sheet_shelves()`, + `get_sheet_filters()`, `get_sheet_axes()`, `get_sheet_sorts()` and as + no-parens active bindings (`parser$sheet_shelves`, etc.). + +### Per-dashboard intelligence + +* `twb_dashboard_sheets()` — list every worksheet placed on a dashboard with + zone id and pixel position (x, y, w, h). +* `twb_dashboard_layout()` — full zone tree including parent zone id, component + type (worksheet / filter / container / …), layout type (tiled / floating), + and pixel bounds. +* `twb_dashboard_actions()` — extract filter and URL actions with source and + target sheets, run-on trigger type, and URL value. +* All three are exposed on `TwbParser` as `get_dashboard_sheets()`, + `get_dashboard_layout()`, `get_dashboard_actions()` and as active bindings. + +## Bug fixes + +* `plot_relationship_graph()`: fixed edge direction — the `from` vertex was + incorrectly built from `right_field` instead of `left_field`. +* `plot_source_join_graph()`: fixed reference to non-existent columns + `left_source` / `right_source`; replaced with `left_table` / `right_table`. +* `infer_implicit_relationships()`: added deduplication before the field-name + self-join to prevent Cartesian explosion when many tables share a field name; + added `relationship = "many-to-many"` to suppress the dplyr 1.1+ warning. +* Fixed `integer_` typo in `insights.R` that caused errors when parsing + dashboard zone dimensions. + +## Internal + +* Canonical `.twb_clean_table()` and `.twb_clean_field()` helpers added to + `utils.R`, replacing four independent copies scattered across `fields.R`, + `calculated_fields.R`, `relationships.R`, `joins.R`, and `dependency_graph.R`. +* `.twb_clean_field()` now correctly strips Tableau column-instance prefixes + (`none:Category:nk` → `Category`) and returns unnamed vectors. + +--- + # twbparser 0.3.1 * Remove use of `unlockBinding()` in internal TwbParser active-binding helpers. diff --git a/R/active-bindings.R b/R/active-bindings.R index 064d9ae..7267cb2 100644 --- a/R/active-bindings.R +++ b/R/active-bindings.R @@ -91,6 +91,15 @@ twb_install_active_properties <- function(x, cache = TRUE) { rebind("twbx_extracts_tbl", wrap_cache("twbx_extracts_tbl", function() x$get_twbx_extracts())) rebind("twbx_images_tbl", wrap_cache("twbx_images_tbl", function() x$get_twbx_images())) + ## Phase 2/3: sheet & dashboard intelligence + rebind("sheet_shelves", wrap_cache("sheet_shelves", function() x$get_sheet_shelves())) + rebind("sheet_filters", wrap_cache("sheet_filters", function() x$get_sheet_filters())) + rebind("sheet_axes", wrap_cache("sheet_axes", function() x$get_sheet_axes())) + rebind("sheet_sorts", wrap_cache("sheet_sorts", function() x$get_sheet_sorts())) + rebind("dashboard_sheets", wrap_cache("dashboard_sheets", function() x$get_dashboard_sheets())) + rebind("dashboard_layout", wrap_cache("dashboard_layout", function() x$get_dashboard_layout())) + rebind("dashboard_actions", wrap_cache("dashboard_actions", function() x$get_dashboard_actions())) + ## Validation snapshot (read-only) rebind( "validation", diff --git a/R/calculated_fields.R b/R/calculated_fields.R index a941198..f6fde38 100644 --- a/R/calculated_fields.R +++ b/R/calculated_fields.R @@ -3,18 +3,6 @@ ifelse(is.na(x), NA_character_, gsub("\\[|\\]", "", x)) } -#' @keywords internal -.clean_table <- function(x) { - if (is.null(x) || is.na(x)) { - return(NA_character_) - } - x <- gsub("^\\[.*?\\]\\.", "", x) # drop [Extract]. / [Connection]. - x <- gsub("\\[|\\]", "", x) # strip [ ] - x <- sub("_[0-9A-Fa-f]{32}$", "", x) # drop 32-hex suffix - x <- trimws(x) - if (!nzchar(x)) NA_character_ else x -} - #' Extract calculated fields from a TWB #' #' Finds columns that contain \code{}nodes and returns metadata and @@ -103,7 +91,7 @@ extract_calculated_fields <- function(xml_doc, include_parameters = FALSE) { calc_class = calc_class, is_table_calc = is_tbl, table = raw_tbl, - table_clean = .clean_table(raw_tbl) + table_clean = .twb_clean_table(raw_tbl) ) }) }) |> @@ -181,7 +169,7 @@ extract_parameters <- function(xml_doc) { current_value = cur_val, is_parameter = TRUE, table = raw_tbl, - table_clean = .clean_table(raw_tbl) + table_clean = .twb_clean_table(raw_tbl) ) }) }) |> @@ -251,7 +239,7 @@ extract_raw_fields <- function(xml_doc) { is_hidden = attr_safe_get(a, "hidden", "false") %in% c("true", "1"), is_parameter = FALSE, table = raw_tbl, - table_clean = .clean_table(raw_tbl) + table_clean = .twb_clean_table(raw_tbl) ) }) }) |> diff --git a/R/dashboard_details.R b/R/dashboard_details.R new file mode 100644 index 0000000..2d44bf5 --- /dev/null +++ b/R/dashboard_details.R @@ -0,0 +1,367 @@ +#' @importFrom xml2 xml_find_all xml_find_first xml_attr xml_name xml_parent xml_children +#' @importFrom tibble tibble +#' @importFrom dplyr bind_rows filter arrange mutate select distinct coalesce +#' @importFrom purrr map_dfr map_chr +NULL + +# ---- twb_dashboard_sheets ---------------------------------------------------- + +#' List worksheets embedded in each dashboard +#' +#' Returns one row per worksheet per dashboard, with the zone's position on the +#' canvas. +#' +#' @param x A `TwbParser` object or an `xml2` document. +#' @param dashboard Optional character scalar to restrict output to one +#' dashboard. +#' +#' @return A tibble with columns: +#' \describe{ +#' \item{dashboard}{Dashboard name.} +#' \item{sheet}{Referenced worksheet name.} +#' \item{zone_id}{Zone identifier.} +#' \item{x}{Horizontal offset (pixels), or `NA`.} +#' \item{y}{Vertical offset (pixels), or `NA`.} +#' \item{w}{Width (pixels), or `NA`.} +#' \item{h}{Height (pixels), or `NA`.} +#' } +#' +#' @examples +#' xml <- xml2::read_xml( +#' ' +#' +#' +#' +#' +#' +#' +#' +#' ' +#' ) +#' twb_dashboard_sheets(xml) +#' +#' @export +twb_dashboard_sheets <- function(x, dashboard = NULL) { + xml_doc <- .twb_resolve_xml(x) + if (!is.null(dashboard)) { + stopifnot(is.character(dashboard), length(dashboard) == 1L) + } + .ins_dashboard_sheets(xml_doc, dashboard) +} + +#' @keywords internal +#' @noRd +.ins_dashboard_sheets <- function(xml_doc, dashboard = NULL) { + d_xpath <- if (is.null(dashboard)) { + ".//dashboard" + } else { + paste0(".//dashboard[@name='", gsub("'", "", dashboard, fixed = TRUE), "']") + } + d_nodes <- xml2::xml_find_all(xml_doc, d_xpath) + if (length(d_nodes) == 0L) { + return(tibble::tibble( + dashboard = character(), sheet = character(), zone_id = character(), + x = integer(), y = integer(), w = integer(), h = integer() + )) + } + + purrr::map_dfr(d_nodes, function(d) { + d_name <- xml2::xml_attr(d, "name") %||% NA_character_ + ws_zones <- xml2::xml_find_all(d, ".//zone[@worksheet]") + if (length(ws_zones) == 0L) return(tibble::tibble()) + + purrr::map_dfr(ws_zones, function(z) { + tibble::tibble( + dashboard = d_name, + sheet = xml2::xml_attr(z, "worksheet") %||% NA_character_, + zone_id = xml2::xml_attr(z, "id") %||% NA_character_, + x = .int_attr(z, "x"), + y = .int_attr(z, "y"), + w = .int_attr(z, "w"), + h = .int_attr(z, "h") + ) + }) + }) |> + dplyr::arrange(.data$dashboard, .data$sheet) +} + +# ---- twb_dashboard_layout ---------------------------------------------------- + +#' Full layout of dashboard zones with container hierarchy +#' +#' Returns one row per zone per dashboard, including the parent-zone relationship +#' and a tiled/floating classification. +#' +#' @param x A `TwbParser` object or an `xml2` document. +#' @param dashboard Optional character scalar to restrict output to one +#' dashboard. +#' +#' @return A tibble with columns: +#' \describe{ +#' \item{dashboard}{Dashboard name.} +#' \item{zone_id}{Zone identifier.} +#' \item{parent_zone_id}{Parent zone identifier (`NA` for root zones).} +#' \item{component_type}{`"worksheet"`, `"filter"`, `"legend"`, +#' `"parameter_control"`, `"text"`, `"image"`, `"container"`, or `"blank"`.} +#' \item{target}{Referenced worksheet name or object, if applicable.} +#' \item{layout_type}{`"floating"` or `"tiled"`.} +#' \item{x}{Horizontal offset, or `NA`.} +#' \item{y}{Vertical offset, or `NA`.} +#' \item{w}{Width, or `NA`.} +#' \item{h}{Height, or `NA`.} +#' } +#' +#' @examples +#' xml <- xml2::read_xml( +#' ' +#' +#' +#' +#' +#' +#' +#' +#' +#' +#' +#' ' +#' ) +#' twb_dashboard_layout(xml) +#' +#' @export +twb_dashboard_layout <- function(x, dashboard = NULL) { + xml_doc <- .twb_resolve_xml(x) + if (!is.null(dashboard)) { + stopifnot(is.character(dashboard), length(dashboard) == 1L) + } + .ins_dashboard_layout(xml_doc, dashboard) +} + +#' @keywords internal +#' @noRd +.ins_dashboard_layout <- function(xml_doc, dashboard = NULL) { + d_xpath <- if (is.null(dashboard)) { + ".//dashboard" + } else { + paste0(".//dashboard[@name='", gsub("'", "", dashboard, fixed = TRUE), "']") + } + d_nodes <- xml2::xml_find_all(xml_doc, d_xpath) + if (length(d_nodes) == 0L) { + return(.empty_layout()) + } + + purrr::map_dfr(d_nodes, function(d) { + d_name <- xml2::xml_attr(d, "name") %||% NA_character_ + zones <- xml2::xml_find_all(d, ".//zone") + if (length(zones) == 0L) return(tibble::tibble()) + + purrr::map_dfr(zones, function(z) { + z_id <- xml2::xml_attr(z, "id") %||% NA_character_ + z_type <- xml2::xml_attr(z, "type") %||% NA_character_ + + # parent zone id + parent_node <- xml2::xml_parent(z) + parent_id <- if (!inherits(parent_node, "xml_missing") && + identical(xml2::xml_name(parent_node), "zone")) { + xml2::xml_attr(parent_node, "id") %||% NA_character_ + } else { + NA_character_ + } + + # component type classification + has_ws <- !is.na(xml2::xml_attr(z, "worksheet")) + has_filt <- length(xml2::xml_find_all(z, "./filter | ./quick-filter")) > 0L + has_leg <- length(xml2::xml_find_all(z, "./legend")) > 0L + has_param <- length(xml2::xml_find_all(z, "./parameter-control")) > 0L + has_text <- length(xml2::xml_find_all(z, "./text")) > 0L + has_img <- length(xml2::xml_find_all(z, "./image")) > 0L + is_container <- !is.na(z_type) && + grepl("^layout", z_type, ignore.case = TRUE) + + comp_type <- dplyr::case_when( + has_ws ~ "worksheet", + has_filt | (!is.na(z_type) & grepl("filter", z_type, ignore.case = TRUE)) ~ "filter", + has_leg | (!is.na(z_type) & grepl("legend", z_type, ignore.case = TRUE)) ~ "legend", + has_param ~ "parameter_control", + has_text | (!is.na(z_type) & grepl("^text", z_type, ignore.case = TRUE)) ~ "text", + has_img | (!is.na(z_type) & grepl("image", z_type, ignore.case = TRUE)) ~ "image", + is_container ~ "container", + !is.na(z_type) & nzchar(z_type) ~ z_type, + TRUE ~ "blank" + ) + + layout_type <- if (!is.na(z_type) && grepl("floating", z_type, ignore.case = TRUE)) { + "floating" + } else { + "tiled" + } + + target <- xml2::xml_attr(z, "worksheet") %||% NA_character_ + + tibble::tibble( + dashboard = d_name, + zone_id = z_id, + parent_zone_id = parent_id, + component_type = comp_type, + target = target, + layout_type = layout_type, + x = .int_attr(z, "x"), + y = .int_attr(z, "y"), + w = .int_attr(z, "w"), + h = .int_attr(z, "h") + ) + }) + }) |> + dplyr::arrange(.data$dashboard, .data$zone_id) +} + +.empty_layout <- function() { + tibble::tibble( + dashboard = character(), + zone_id = character(), + parent_zone_id = character(), + component_type = character(), + target = character(), + layout_type = character(), + x = integer(), y = integer(), w = integer(), h = integer() + ) +} + +# ---- twb_dashboard_actions --------------------------------------------------- + +#' Extract dashboard and workbook actions +#' +#' Parses `` nodes from dashboard `` sections and the +#' top-level workbook `` section. Returns one row per action. +#' +#' @param x A `TwbParser` object or an `xml2` document. +#' @param dashboard Optional character scalar. When supplied, only actions +#' whose `` include a sheet from that dashboard are returned. +#' Pass `NULL` (default) to return all actions. +#' +#' @return A tibble with columns: +#' \describe{ +#' \item{action_name}{Caption / display name of the action.} +#' \item{action_type}{`"filter"`, `"highlight"`, `"url"`, `"set"`, or +#' another type string from the XML.} +#' \item{source_sheets}{Comma-separated list of source worksheet names.} +#' \item{target_sheet}{Target worksheet name, or `NA` for URL actions.} +#' \item{run_on}{Trigger: `"select"`, `"menu"`, or `"hover"`.} +#' \item{url}{URL value for URL-type actions; `NA` otherwise.} +#' } +#' +#' @examples +#' xml <- xml2::read_xml( +#' ' +#' +#' +#' +#' +#' +#' +#' +#' +#' +#' +#' +#' ' +#' ) +#' twb_dashboard_actions(xml) +#' +#' @export +twb_dashboard_actions <- function(x, dashboard = NULL) { + xml_doc <- .twb_resolve_xml(x) + if (!is.null(dashboard)) { + stopifnot(is.character(dashboard), length(dashboard) == 1L) + } + .ins_dashboard_actions(xml_doc, dashboard) +} + +#' @keywords internal +#' @noRd +.ins_dashboard_actions <- function(xml_doc, dashboard = NULL) { + act_nodes <- xml2::xml_find_all(xml_doc, ".//actions/action") + if (length(act_nodes) == 0L) { + return(.empty_actions()) + } + + out <- purrr::map_dfr(act_nodes, function(act) { + caption <- xml2::xml_attr(act, "caption") %||% + xml2::xml_attr(act, "name") %||% NA_character_ + atype <- xml2::xml_attr(act, "type") %||% NA_character_ + + src_sheets <- xml2::xml_find_all(act, ".//source-sheet") + src_names <- xml2::xml_attr(src_sheets, "name") + src_names <- src_names[!is.na(src_names)] + src_str <- if (length(src_names)) paste(src_names, collapse = ", ") else NA_character_ + + tgt_sheets <- xml2::xml_find_all(act, ".//target-sheet") + tgt_names <- xml2::xml_attr(tgt_sheets, "name") + tgt_names <- tgt_names[!is.na(tgt_names)] + tgt_str <- if (length(tgt_names)) paste(tgt_names, collapse = ", ") else NA_character_ + + run_on_nd <- xml2::xml_find_first(act, ".//run-on") + run_on <- if (!inherits(run_on_nd, "xml_missing")) { + xml2::xml_attr(run_on_nd, "type") %||% NA_character_ + } else { + NA_character_ + } + + url_nd <- xml2::xml_find_first(act, ".//url") + url <- if (!inherits(url_nd, "xml_missing")) { + xml2::xml_attr(url_nd, "value") %||% NA_character_ + } else { + NA_character_ + } + + tibble::tibble( + action_name = caption, + action_type = atype, + source_sheets = src_str, + target_sheet = tgt_str, + run_on = run_on, + url = url + ) + }) + + # Optional: filter to actions relevant to a named dashboard + if (!is.null(dashboard)) { + # Get sheets in that dashboard + dash_sheets <- .ins_dashboard_sheets(xml_doc, dashboard)$sheet + if (length(dash_sheets)) { + out <- dplyr::filter( + out, + vapply( + source_sheets, + function(s) { + if (is.na(s)) return(FALSE) + any(dash_sheets %in% trimws(strsplit(s, ",", fixed = TRUE)[[1L]])) + }, + logical(1L) + ) + ) + } + } + + out |> + dplyr::distinct() |> + dplyr::arrange(.data$action_type, .data$action_name) +} + +.empty_actions <- function() { + tibble::tibble( + action_name = character(), + action_type = character(), + source_sheets = character(), + target_sheet = character(), + run_on = character(), + url = character() + ) +} + +# ---- Shared helpers ---------------------------------------------------------- + +#' @keywords internal +.int_attr <- function(node, attr_name) { + suppressWarnings(as.integer(xml2::xml_attr(node, attr_name))) +} diff --git a/R/dependency_graph.R b/R/dependency_graph.R index a949981..eb6645b 100644 --- a/R/dependency_graph.R +++ b/R/dependency_graph.R @@ -1,10 +1,3 @@ -#' @keywords internal -.clean_name <- function(x) { - x <- gsub("\\[|\\]", "", x) # strip brackets - x <- sub("_[0-9A-Fa-f]{32}$", "", x) # drop 32-char Tableau suffix - x -} - #' @keywords internal .normalize_token <- function(tok) { if (is.na(tok) || !nzchar(tok)) { @@ -205,13 +198,13 @@ plot_source_join_graph <- function(joins_df, relationships_df = NULL, seed = NUL rel_edges <- if (has_operator) { relationships_df |> - dplyr::transmute(from = left_source, to = right_source, label = operator) |> + dplyr::transmute(from = left_table, to = right_table, label = operator) |> dplyr::distinct() } else { relationships_df |> dplyr::transmute( - from = left_source, - to = right_source, + from = left_table, + to = right_table, label = paste0(left_field, " = ", right_field) ) |> dplyr::distinct() @@ -264,8 +257,8 @@ plot_relationship_graph <- function(relationships_df, seed = NULL) { edges <- relationships_df |> dplyr::mutate( - from = paste0(.clean_name(left_table), ".", right_field), - to = paste0(.clean_name(right_table), ".", right_field), + from = paste0(.twb_clean_table(left_table), ".", left_field), + to = paste0(.twb_clean_table(right_table), ".", right_field), label = if ("operator" %in% names(relationships_df)) operator else "=" ) |> dplyr::select(from, to, label) |> diff --git a/R/fields.R b/R/fields.R index b7b15c1..f27c8df 100644 --- a/R/fields.R +++ b/R/fields.R @@ -1,30 +1,3 @@ -#' @keywords internal -.f_clean_table <- function(x) { - if (is.null(x) || is.na(x)) { - return(NA_character_) - } - x <- gsub("^\\[.*?\\]\\.", "", x) # remove [Extract]. or [Connection]. - x <- gsub("\\[|\\]", "", x) # strip brackets - x <- sub("_[0-9A-Fa-f]{32}$", "", x) # drop trailing _32hex - x <- trimws(x) - if (!nzchar(x)) NA_character_ else x -} - -#' @keywords internal -.f_clean_field <- function(x) { - if (is.null(x) || is.na(x)) { - return(NA_character_) - } - x <- gsub("\\[|\\]", "", x) # strip brackets - x <- sub("^([^:]+:)+", "", x) # drop "none:" / "clct:" prefixes - parts <- unlist(strsplit(x, "\\.", fixed = FALSE), use.names = FALSE) - parts <- parts[nzchar(parts)] - if (!length(parts)) { - return(NA_character_) - } - tail(parts, 1) -} - #' Extract columns with their source tables from a TWB #' #' Scans top-level \verb{} nodes (excluding view-specific references) and @@ -79,33 +52,20 @@ extract_columns_with_table_source <- function(xml_doc) { a <- xml2::xml_attrs(col) raw_table <- attr_safe_get(a, "table", NA_character_) - raw_name <- attr_safe_get(a, "name", NA_character_) - cap <- attr_safe_get(a, "caption", NA_character_) - - # compute field_clean - nn <- gsub("\\[|\\]", "", raw_name) - nn <- sub("^([^:]+:)+", "", nn) - parts <- strsplit(nn, "\\.", perl = TRUE)[[1]] - field_clean <- if (length(parts)) tail(parts, 1) else nn - - # compute table_clean - table_clean <- raw_table |> - gsub("^\\[.*?\\]\\.", "", x = _) |> - gsub("\\[|\\]", "", x = _) |> - gsub("_[0-9A-Fa-f]{32}$", "", x = _) |> - trimws() + raw_name <- attr_safe_get(a, "name", NA_character_) + cap <- attr_safe_get(a, "caption", NA_character_) tibble::tibble( - datasource = ds_name, - name = raw_name, - caption = cap, - datatype = attr_safe_get(a, "datatype", NA_character_), - role = attr_safe_get(a, "role", NA_character_), - semantic_role = attr_safe_get(a, "semantic-role", NA_character_), - table = gsub("_[0-9A-Fa-f]{32}$", "", raw_table), - table_clean = ifelse(table_clean == "", NA_character_, table_clean), - field_clean = ifelse(field_clean == "", NA_character_, field_clean), - is_parameter = !is.na(attr_safe_get(a, "param-domain-type", NA_character_)) + datasource = ds_name, + name = raw_name, + caption = cap, + datatype = attr_safe_get(a, "datatype", NA_character_), + role = attr_safe_get(a, "role", NA_character_), + semantic_role = attr_safe_get(a, "semantic-role", NA_character_), + table = gsub("_[0-9A-Fa-f]{32}$", "", raw_table %||% NA_character_), + table_clean = .twb_clean_table(raw_table), + field_clean = .twb_clean_field(raw_name), + is_parameter = !is.na(attr_safe_get(a, "param-domain-type", NA_character_)) ) }) }) |> @@ -159,35 +119,14 @@ infer_implicit_relationships <- function(fields_df, max_pairs = 50000L) { if (!nm %in% names(fields_df)) fields_df[[nm]] <- NA_character_ } - # --- vectorized cleaners --- - vec_clean_table <- function(x) { - x <- as.character(x) - x <- gsub("^\\[.*?\\]\\.", "", x) - x <- gsub("\\[|\\]", "", x) - x <- gsub("_[0-9A-Fa-f]{32}$", "", x) - x <- trimws(x) - x[x == ""] <- NA_character_ - x - } - vec_clean_field <- function(x) { - x <- as.character(x) - x <- gsub("\\[|\\]", "", x) - x <- sub("^([^:]+:)+", "", x) - parts <- strsplit(x, "\\.", perl = TRUE) - vapply(parts, function(p) { - p <- p[nzchar(p)] - if (!length(p)) NA_character_ else tail(p, 1) - }, character(1)) - } - fields_df <- fields_df |> dplyr::mutate( table_clean = as.character(table_clean), field_clean = as.character(field_clean), table = as.character(table), name = as.character(name), - table_use = dplyr::coalesce(dplyr::na_if(table_clean, ""), vec_clean_table(table)), - field_use = dplyr::coalesce(dplyr::na_if(field_clean, ""), vec_clean_field(name)) + table_use = dplyr::coalesce(dplyr::na_if(table_clean, ""), .twb_clean_table(table)), + field_use = dplyr::coalesce(dplyr::na_if(field_clean, ""), .twb_clean_field(name)) ) f <- fields_df |> @@ -213,7 +152,8 @@ infer_implicit_relationships <- function(fields_df, max_pairs = 50000L) { by_role <- dplyr::inner_join( f_role, f_role, - by = "role", suffix = c("_l", "_r") + by = "role", suffix = c("_l", "_r"), + relationship = "many-to-many" ) |> dplyr::filter(table_l != table_r) |> dplyr::transmute( @@ -225,11 +165,15 @@ infer_implicit_relationships <- function(fields_df, max_pairs = 50000L) { ) + # Deduplicate on (table, field_lower) before the self-join to prevent + # a Cartesian explosion when the same field appears multiple times. f2 <- f |> - dplyr::mutate(field_lower = tolower(field)) + dplyr::mutate(field_lower = tolower(field)) |> + dplyr::distinct(table, field_lower, .keep_all = TRUE) by_name <- f2 |> - dplyr::inner_join(f2, by = "field_lower", suffix = c("_l", "_r")) |> + dplyr::inner_join(f2, by = "field_lower", suffix = c("_l", "_r"), + relationship = "many-to-many") |> dplyr::filter(table_l != table_r) |> dplyr::transmute( left_table = table_l, diff --git a/R/globals.R b/R/globals.R index 8973888..3913aa0 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,19 +1,31 @@ # Register NSE column names to silence R CMD check "no visible binding" notes. utils::globalVariables(c( # columns from unzip() listing + our manifests - "Name","Length","Date","type","size_bytes", + "Name", "Length", "Date", "type", "size_bytes", # generic field/graph columns - "name","formula","inputs","from","output","to","label","field", + "name", "formula", "inputs", "from", "output", "to", "label", "field", # datasource/details joins - "location","location_named","connection_type","connection_class","field_count", - "datasource_name","connection_caption","datasource","primary_table", - "connection_id","connection_target", + "location", "location_named", "connection_type", "connection_class", "field_count", + "datasource_name", "connection_caption", "datasource", "primary_table", + "connection_id", "connection_target", # joins/relationships - "left_field","right_field","operator","left_table","right_table", - "left_source","right_source", + "left_field", "right_field", "operator", "left_table", "right_table", # inferred relationships - "table_clean","field_clean","is_parameter","table_use","field_use","semantic_role", - "table_l","table_r","field_l","field_r","key", + "table_clean", "field_clean", "is_parameter", "table_use", "field_use", + "semantic_role", "table_l", "table_r", "field_l", "field_r", "key", + "field_lower", # validators - "left_tok","right_tok","left_base","right_base","left_ok","right_ok" + "left_tok", "right_tok", "left_base", "right_base", "left_ok", "right_ok", + # insights / page composition + "value", "palette_name", "kind", "detail", "scope", + "dashboard", "mark_types", "filters", "chart_types", "integer_", "page_type", + # sheet_details + "shelf", "field_ref", "field_instance", "aggregation", + "filter_class", "include_mode", "members", "range_min", "range_max", + "reversed", "include_zero", "scale_type", + "sort_order", "sort_by", + # dashboard_details + "action_name", "action_type", "source_sheets", "target_sheet", "run_on", + "url", "layout_type", "parent_zone_id", "component_type", "zone_id", + "sheet" )) diff --git a/R/insights.R b/R/insights.R index b84b302..562833f 100644 --- a/R/insights.R +++ b/R/insights.R @@ -206,7 +206,7 @@ NULL page_type = "story", page_name = xml2::xml_attr(s, "name"), component_type = character(), zone_id = character(), target = character(), field = character(), presentation = character(), - x = integer(), y = integer(), w = integer(), h = integer_ + x = integer(), y = integer(), w = integer(), h = integer() ) ) out <- purrr::map_dfr(seq_along(pts), function(i) { diff --git a/R/joins.R b/R/joins.R index 34f9537..f5e218c 100644 --- a/R/joins.R +++ b/R/joins.R @@ -1,30 +1,3 @@ -#' @keywords internal -.j_clean_table <- function(x) { - if (is.null(x) || is.na(x)) { - return(NA_character_) - } - x <- gsub("^\\[.*?\\]\\.", "", x) # remove [Extract]. / [Connection]. - x <- gsub("\\[|\\]", "", x) # remove brackets - x <- sub("_[0-9A-Fa-f]{32}$", "", x) # drop 32-hex suffix - x <- trimws(x) - if (!nzchar(x)) NA_character_ else x -} - -#' @keywords internal -.j_clean_field <- function(x) { - if (is.null(x) || is.na(x)) { - return(NA_character_) - } - x <- gsub("\\[|\\]", "", x) # strip brackets - x <- sub("^([^:]+:)+", "", x) - parts <- strsplit(x, "\\.", fixed = FALSE)[[1]] - parts <- parts[nzchar(parts)] - if (!length(parts)) { - return(NA_character_) - } - tail(parts, 1) -} - #' @keywords internal .j_field_from_expr <- function(node) { if (inherits(node, "xml_missing")) { @@ -42,7 +15,7 @@ raw <- cand[which.max(nchar(cand))] m <- stringr::str_extract_all(raw, "\\[[^\\]]+\\]")[[1]] token <- if (length(m)) m[length(m)] else raw - .j_clean_field(token) + .twb_clean_field(token) } #' Extract Tableau join clauses from \verb{} nodes @@ -99,10 +72,10 @@ extract_joins <- function(xml_doc) { op <- attr_safe_get(xml2::xml_attrs(cl), "op", "=") cols <- xml2::xml_find_all(cl, ".//column") if (length(cols) == 2) { - left_tbl <- .j_clean_table(attr_safe_get(xml2::xml_attrs(cols[[1]]), "table")) - left_fld <- .j_clean_field(attr_safe_get(xml2::xml_attrs(cols[[1]]), "name")) - right_tbl <- .j_clean_table(attr_safe_get(xml2::xml_attrs(cols[[2]]), "table")) - right_fld <- .j_clean_field(attr_safe_get(xml2::xml_attrs(cols[[2]]), "name")) + left_tbl <- .twb_clean_table(attr_safe_get(xml2::xml_attrs(cols[[1]]), "table")) + left_fld <- .twb_clean_field(attr_safe_get(xml2::xml_attrs(cols[[1]]), "name")) + right_tbl <- .twb_clean_table(attr_safe_get(xml2::xml_attrs(cols[[2]]), "table")) + right_fld <- .twb_clean_field(attr_safe_get(xml2::xml_attrs(cols[[2]]), "name")) tibble::tibble( join_type = join_type, @@ -132,8 +105,8 @@ extract_joins <- function(xml_doc) { } # Optional table hints on child expressions (rare) - lt <- .j_clean_table(xml2::xml_attr(kids[[1]], "table")) - rt <- .j_clean_table(xml2::xml_attr(kids[[2]], "table")) + lt <- .twb_clean_table(xml2::xml_attr(kids[[1]], "table")) + rt <- .twb_clean_table(xml2::xml_attr(kids[[2]], "table")) tibble::tibble( join_type = join_type, diff --git a/R/relationships.R b/R/relationships.R index c9e61e2..85fb9f1 100644 --- a/R/relationships.R +++ b/R/relationships.R @@ -1,12 +1,3 @@ -#' @keywords internal -clean_name <- function(name) { - name |> - stringr::str_remove("^\\[.*?\\]\\.") |> # remove leading [Extract]. or [Connection]. - stringr::str_remove("_[:xdigit:]{32}$") |> # drop trailing _32hex suffix - stringr::str_remove_all("\\[|\\]") |> # strip all brackets - stringr::str_trim() -} - #' @keywords internal .extract_field_from_expr <- function(x) { if (inherits(x, "xml_missing")) { @@ -74,17 +65,6 @@ extract_relations <- function(xml_doc) { dplyr::distinct() } -#' @keywords internal -.rel_clean_table <- function(x) { - if (is.null(x) || is.na(x)) { - return(NA_character_) - } - x <- gsub("^\\[.*?\\]\\.", "", x) - x <- gsub("\\[|\\]", "", x) - x <- sub("_[0-9A-Fa-f]{32}$", "", x) - trimws(x) -} - #' @keywords internal .rel_field_expr <- function(node) { if (inherits(node, "xml_missing")) { @@ -127,19 +107,19 @@ build_object_table_mapping <- function(xml_doc) { caps <- xml2::xml_attr(objs, "caption") for (i in seq_along(ids)) { if (!is.na(ids[i]) && !is.na(caps[i])) { - mapping[[ids[i]]] <- .rel_clean_table(caps[i]) + mapping[[ids[i]]] <- .twb_clean_table(caps[i]) } } } for (lt in xml2::xml_find_all(xml_doc, "//logical-table[@id]")) { id <- xml2::xml_attr(lt, "id") nm <- xml2::xml_attr(lt, "name") - if (!is.na(id) && !is.na(nm)) mapping[[id]] <- .rel_clean_table(nm) + if (!is.na(id) && !is.na(nm)) mapping[[id]] <- .twb_clean_table(nm) } for (rel in xml2::xml_find_all(xml_doc, "//relation[@name]")) { nm <- xml2::xml_attr(rel, "name") tb <- xml2::xml_attr(rel, "table") - if (!is.na(nm)) mapping[[nm]] <- .rel_clean_table(tb %||% nm) + if (!is.na(nm)) mapping[[nm]] <- .twb_clean_table(tb %||% nm) } mapping } @@ -179,8 +159,8 @@ extract_relationships <- function(xml_doc) { purrr::map_dfr(rel_nodes, function(rel_node) { e1 <- xml2::xml_attr(xml2::xml_find_first(rel_node, ".//first-end-point"), "object-id") e2 <- xml2::xml_attr(xml2::xml_find_first(rel_node, ".//second-end-point"), "object-id") - left_table <- .rel_clean_table(id_map[[e1]] %||% e1) - right_table <- .rel_clean_table(id_map[[e2]] %||% e2) + left_table <- .twb_clean_table(id_map[[e1]] %||% e1) + right_table <- .twb_clean_table(id_map[[e2]] %||% e2) ex <- xml2::xml_find_first(rel_node, ".//expression[@op][count(./expression) >= 2]") if (inherits(ex, "xml_missing")) { return(tibble::tibble()) diff --git a/R/sheet_details.R b/R/sheet_details.R new file mode 100644 index 0000000..58faa79 --- /dev/null +++ b/R/sheet_details.R @@ -0,0 +1,571 @@ +#' @importFrom xml2 xml_find_all xml_find_first xml_attr xml_text xml_parent xml_name xml_children +#' @importFrom tibble tibble +#' @importFrom dplyr bind_rows filter arrange mutate select distinct +#' @importFrom purrr map_dfr +NULL + +# ---- Internal helpers ------------------------------------------------------- + +# Parse a Tableau column-reference attribute, e.g. +# "[federated.abc].[none:Category:nk]" +# "SUM([federated.abc].[Sales])" +# Returns a named list: datasource, field_instance, field_clean, aggregation +.parse_col_ref <- function(ref) { + empty <- list( + datasource = NA_character_, + field_instance = NA_character_, + field_clean = NA_character_, + aggregation = NA_character_ + ) + if (is.null(ref) || is.na(ref) || !nzchar(ref)) return(empty) + + # 1) strip outer aggregation wrapper: SUM(...) ATTR(...) etc. + agg <- NA_character_ + m <- regexpr("^([A-Z_]+)\\((.+)\\)$", ref, perl = TRUE) + if (m > 0L) { + starts <- attr(m, "capture.start") + lens <- attr(m, "capture.length") + agg <- substr(ref, starts[1], starts[1] + lens[1] - 1L) + ref <- substr(ref, starts[2], starts[2] + lens[2] - 1L) + } + + # 2) extract bracketed tokens + toks <- regmatches(ref, gregexpr("\\[[^\\]]+\\]", ref, perl = TRUE))[[1L]] + toks <- gsub("^\\[|\\]$", "", toks) + + ds <- if (length(toks) >= 1L) toks[[1L]] else NA_character_ + fi <- if (length(toks) >= 2L) toks[[2L]] else + if (length(toks) == 1L) toks[[1L]] else NA_character_ + + # 3) clean field instance: "none:Category:nk" -> "Category" + fc <- fi + if (!is.na(fc)) { + fc <- sub("^[a-z]+:", "", fc) # strip derivation prefix "none:", "clct:" + fc <- sub(":[a-z]+$", "", fc) # strip pivot suffix ":nk", ":qk" + fc <- .twb_clean_table(fc) # strip remaining brackets / hex suffix + } + + list(datasource = ds, field_instance = fi, field_clean = fc, aggregation = agg) +} + +# ---- twb_sheet_shelves ------------------------------------------------------- + +#' Extract field-to-shelf assignments for worksheets +#' +#' Returns a tidy tibble describing which fields are placed on each visual +#' shelf (rows, cols, color, size, label, detail, tooltip, etc.) for every +#' worksheet in the workbook (or a single named sheet). +#' +#' @param x A `TwbParser` object or an `xml2` document. +#' @param sheet Optional character scalar. When supplied only that worksheet +#' is returned; otherwise all worksheets are returned. +#' +#' @return A tibble with columns: +#' \describe{ +#' \item{sheet}{Worksheet name.} +#' \item{shelf}{Shelf name: `"rows"`, `"cols"`, or an encoding type such as +#' `"color"`, `"size"`, `"label"`, `"detail"`, `"tooltip"`, `"shape"`, +#' `"text"`, `"path"`, `"angle"`, `"lod"`, `"geometry"`, etc.} +#' \item{field_ref}{Raw column-reference attribute value.} +#' \item{field_instance}{Field instance name (after stripping datasource prefix).} +#' \item{field_clean}{Human-readable field name.} +#' \item{datasource}{Datasource name referenced.} +#' \item{aggregation}{Aggregation function (`"SUM"`, `"AVG"`, …) or `NA`.} +#' } +#' +#' @examples +#' xml <- xml2::read_xml( +#' ' +#' +#' +#' +#' [ds].[Category] +#' [ds].[Sales] +#' +#' +#' +#' +#' +#' +#' +#' +#'
+#'
+#'
+#'
' +#' ) +#' twb_sheet_shelves(xml) +#' +#' @export +twb_sheet_shelves <- function(x, sheet = NULL) { + xml_doc <- .twb_resolve_xml(x) + if (!is.null(sheet)) { + stopifnot(is.character(sheet), length(sheet) == 1L) + } + .ins_sheet_shelves(xml_doc, sheet) +} + +#' @keywords internal +#' @noRd +.ins_sheet_shelves <- function(xml_doc, sheet = NULL) { + ws_xpath <- if (is.null(sheet)) { + ".//worksheet" + } else { + paste0(".//worksheet[@name='", gsub("'", "", sheet, fixed = TRUE), "']") + } + ws_nodes <- xml2::xml_find_all(xml_doc, ws_xpath) + + if (length(ws_nodes) == 0L) { + return(.empty_shelves()) + } + + out <- purrr::map_dfr(ws_nodes, function(ws) { + ws_name <- xml2::xml_attr(ws, "name") %||% NA_character_ + + # rows shelf + rows_el <- xml2::xml_find_first(ws, "./table/rows") + rows_text <- if (!inherits(rows_el, "xml_missing")) xml2::xml_text(rows_el) else NA_character_ + + # cols shelf + cols_el <- xml2::xml_find_first(ws, "./table/cols") + cols_text <- if (!inherits(cols_el, "xml_missing")) xml2::xml_text(cols_el) else NA_character_ + + shelf_rows <- .parse_shelf_text_to_tibble(ws_name, "rows", rows_text) + shelf_cols <- .parse_shelf_text_to_tibble(ws_name, "cols", cols_text) + + # encoding shelves (color, size, label, detail, tooltip, ...) + enc_nodes <- xml2::xml_find_all(ws, "./table/panes/pane/encodings/*") + shelf_enc <- if (length(enc_nodes)) { + purrr::map_dfr(enc_nodes, function(en) { + shelf_nm <- xml2::xml_name(en) + col_ref <- xml2::xml_attr(en, "column") %||% NA_character_ + if (is.na(col_ref)) return(tibble::tibble()) + parsed <- .parse_col_ref(col_ref) + tibble::tibble( + sheet = ws_name, + shelf = shelf_nm, + field_ref = col_ref, + field_instance = parsed$field_instance, + field_clean = parsed$field_clean, + datasource = parsed$datasource, + aggregation = parsed$aggregation + ) + }) + } else { + tibble::tibble() + } + + dplyr::bind_rows(shelf_rows, shelf_cols, shelf_enc) + }) + if (nrow(out) == 0L) return(.empty_shelves()) + dplyr::arrange(dplyr::distinct(out), .data$sheet, .data$shelf, .data$field_clean) +} + +# Parse "[ds].[f1], [ds].[f2]" shelf text into a shelf tibble +.parse_shelf_text_to_tibble <- function(ws_name, shelf_name, text) { + if (is.null(text) || is.na(text) || !nzchar(trimws(text))) { + return(tibble::tibble()) + } + # Extract every [something].[something] pair (handles AGG wrappers) + refs <- regmatches( + text, + gregexpr("[A-Z_]*\\[[^\\]]+\\]\\.\\[[^\\]]+\\]", text, perl = TRUE) + )[[1L]] + if (!length(refs)) return(tibble::tibble()) + + purrr::map_dfr(refs, function(ref) { + parsed <- .parse_col_ref(ref) + tibble::tibble( + sheet = ws_name, + shelf = shelf_name, + field_ref = ref, + field_instance = parsed$field_instance, + field_clean = parsed$field_clean, + datasource = parsed$datasource, + aggregation = parsed$aggregation + ) + }) +} + +.empty_shelves <- function() { + tibble::tibble( + sheet = character(), + shelf = character(), + field_ref = character(), + field_instance = character(), + field_clean = character(), + datasource = character(), + aggregation = character() + ) +} + +# ---- twb_sheet_filters ------------------------------------------------------- + +#' Extract detailed filter configuration for worksheets +#' +#' Returns one row per filter per worksheet, with full details on filter class, +#' inclusion mode, categorical members, and numeric or date range bounds. +#' +#' @param x A `TwbParser` object or an `xml2` document. +#' @param sheet Optional character scalar to restrict output to one worksheet. +#' +#' @return A tibble with columns: +#' \describe{ +#' \item{sheet}{Worksheet name.} +#' \item{field_ref}{Raw column-reference attribute value.} +#' \item{field_clean}{Human-readable field name.} +#' \item{datasource}{Datasource name.} +#' \item{filter_class}{Tableau filter class: `"categorical"`, `"range"`, +#' `"relative-date"`, `"date"`, `"set"`, `"top"`, etc.} +#' \item{include_mode}{`"include"` or `"exclude"`.} +#' \item{members}{Comma-separated member values for categorical filters; `NA` otherwise.} +#' \item{range_min}{Lower bound for range/quantitative filters; `NA` otherwise.} +#' \item{range_max}{Upper bound; `NA` otherwise.} +#' } +#' +#' @examples +#' xml <- xml2::read_xml( +#' ' +#' +#' +#' +#' +#' +#' +#' +#' +#' +#' +#' +#'
+#'
+#'
+#'
' +#' ) +#' twb_sheet_filters(xml) +#' +#' @export +twb_sheet_filters <- function(x, sheet = NULL) { + xml_doc <- .twb_resolve_xml(x) + if (!is.null(sheet)) { + stopifnot(is.character(sheet), length(sheet) == 1L) + } + .ins_sheet_filters(xml_doc, sheet) +} + +#' @keywords internal +#' @noRd +.ins_sheet_filters <- function(xml_doc, sheet = NULL) { + ws_xpath <- if (is.null(sheet)) { + ".//worksheet" + } else { + paste0(".//worksheet[@name='", gsub("'", "", sheet, fixed = TRUE), "']") + } + ws_nodes <- xml2::xml_find_all(xml_doc, ws_xpath) + + if (length(ws_nodes) == 0L) { + return(.empty_filters()) + } + + out <- purrr::map_dfr(ws_nodes, function(ws) { + ws_name <- xml2::xml_attr(ws, "name") %||% NA_character_ + flt_nodes <- xml2::xml_find_all(ws, ".//filter[@column]") + if (length(flt_nodes) == 0L) return(tibble::tibble()) + + purrr::map_dfr(flt_nodes, function(fn) { + col_ref <- xml2::xml_attr(fn, "column") %||% NA_character_ + parsed <- .parse_col_ref(col_ref) + cls <- xml2::xml_attr(fn, "class") %||% NA_character_ + + # include / exclude + excl <- xml2::xml_attr(fn, "exclude") %||% "false" + include_mode <- if (identical(excl, "true")) "exclude" else "include" + + # categorical members + members <- NA_character_ + if (!is.na(cls) && cls %in% c("categorical", "set")) { + mem_nodes <- xml2::xml_find_all(fn, ".//groupfilter[@function='member']") + if (length(mem_nodes)) { + raw_vals <- xml2::xml_attr(mem_nodes, "member") %||% character() + raw_vals <- gsub("^\\[|\\]$", "", raw_vals[!is.na(raw_vals)]) + if (length(raw_vals)) members <- paste(raw_vals, collapse = ", ") + } + } + + # range min / max + range_min <- NA_character_ + range_max <- NA_character_ + min_nd <- xml2::xml_find_first(fn, "./min") + max_nd <- xml2::xml_find_first(fn, "./max") + if (!inherits(min_nd, "xml_missing")) range_min <- xml2::xml_text(min_nd) + if (!inherits(max_nd, "xml_missing")) range_max <- xml2::xml_text(max_nd) + + tibble::tibble( + sheet = ws_name, + field_ref = col_ref, + field_clean = parsed$field_clean, + datasource = parsed$datasource, + filter_class = cls, + include_mode = include_mode, + members = members, + range_min = range_min, + range_max = range_max + ) + }) + }) + if (nrow(out) == 0L) return(.empty_filters()) + dplyr::arrange(dplyr::distinct(out), .data$sheet, .data$field_clean) +} + +.empty_filters <- function() { + tibble::tibble( + sheet = character(), + field_ref = character(), + field_clean = character(), + datasource = character(), + filter_class = character(), + include_mode = character(), + members = character(), + range_min = character(), + range_max = character() + ) +} + +# ---- twb_sheet_axes ---------------------------------------------------------- + +#' Extract axis configuration for worksheets +#' +#' Reads per-axis style rules embedded in worksheet XML and returns one row per +#' axis per worksheet. +#' +#' @param x A `TwbParser` object or an `xml2` document. +#' @param sheet Optional character scalar to restrict output to one worksheet. +#' +#' @return A tibble with columns: +#' \describe{ +#' \item{sheet}{Worksheet name.} +#' \item{axis}{Axis identifier (e.g., `"rows"`, `"cols"`, or `"automatic"`).} +#' \item{field_ref}{Column reference if axis is field-specific; `NA` otherwise.} +#' \item{field_clean}{Human-readable field name; `NA` if not field-specific.} +#' \item{scale_type}{Scale type (`"linear"`, `"log"`, …) if present; `NA` otherwise.} +#' \item{reversed}{Logical: `TRUE` if axis is reversed.} +#' \item{include_zero}{Logical: `TRUE` if zero is pinned on axis.} +#' } +#' +#' @examples +#' xml <- xml2::read_xml( +#' ' +#' +#' +#' +#' +#'
+#'
+#'
+#'
' +#' ) +#' twb_sheet_axes(xml) +#' +#' @export +twb_sheet_axes <- function(x, sheet = NULL) { + xml_doc <- .twb_resolve_xml(x) + if (!is.null(sheet)) { + stopifnot(is.character(sheet), length(sheet) == 1L) + } + .ins_sheet_axes(xml_doc, sheet) +} + +#' @keywords internal +#' @noRd +.ins_sheet_axes <- function(xml_doc, sheet = NULL) { + ws_xpath <- if (is.null(sheet)) { + ".//worksheet" + } else { + paste0(".//worksheet[@name='", gsub("'", "", sheet, fixed = TRUE), "']") + } + ws_nodes <- xml2::xml_find_all(xml_doc, ws_xpath) + if (length(ws_nodes) == 0L) return(.empty_axes()) + + out <- purrr::map_dfr(ws_nodes, function(ws) { + ws_name <- xml2::xml_attr(ws, "name") %||% NA_character_ + + # Workbook-style-rule approach: + axis_rules <- xml2::xml_find_all(ws, ".//style-rule[@element='axis']") + rule_rows <- if (length(axis_rules)) { + purrr::map_dfr(axis_rules, function(rule) { + fmt_nodes <- xml2::xml_find_all(rule, "./format") + fmt_attrs <- xml2::xml_attr(fmt_nodes, "attr") + fmt_vals <- xml2::xml_attr(fmt_nodes, "value") + get_fmt <- function(a) { + i <- match(a, fmt_attrs) + if (!is.na(i)) fmt_vals[[i]] else NA_character_ + } + reversed <- isTRUE(get_fmt("reverse") %in% c("true", "1")) + include_zero <- isTRUE(get_fmt("scale-include-zero") %in% c("true", "1")) + scale_type <- get_fmt("scale-type") + + tibble::tibble( + sheet = ws_name, + axis = "automatic", + field_ref = NA_character_, + field_clean = NA_character_, + scale_type = scale_type, + reversed = reversed, + include_zero = include_zero + ) + }) + } else { + tibble::tibble() + } + + # Per-column axis styles: + col_axes <- xml2::xml_find_all(ws, ".//axis-styles/axis-style[@column]") + col_rows <- if (length(col_axes)) { + purrr::map_dfr(col_axes, function(ax) { + col_ref <- xml2::xml_attr(ax, "column") %||% NA_character_ + parsed <- .parse_col_ref(col_ref) + scale_nd <- xml2::xml_find_first(ax, "./scale") + reversed <- FALSE + include_zero <- FALSE + scale_type <- NA_character_ + if (!inherits(scale_nd, "xml_missing")) { + reversed <- isTRUE(xml2::xml_attr(scale_nd, "reversed") %in% c("true", "1")) + include_zero <- isTRUE(xml2::xml_attr(scale_nd, "include-origin") %in% c("true", "1")) + scale_type <- xml2::xml_attr(scale_nd, "type") %||% NA_character_ + } + tibble::tibble( + sheet = ws_name, + axis = xml2::xml_attr(ax, "axis") %||% NA_character_, + field_ref = col_ref, + field_clean = parsed$field_clean, + scale_type = scale_type, + reversed = reversed, + include_zero = include_zero + ) + }) + } else { + tibble::tibble() + } + + dplyr::bind_rows(rule_rows, col_rows) + }) + if (nrow(out) == 0L) return(.empty_axes()) + dplyr::arrange(dplyr::distinct(out), .data$sheet, .data$axis) +} + +.empty_axes <- function() { + tibble::tibble( + sheet = character(), + axis = character(), + field_ref = character(), + field_clean = character(), + scale_type = character(), + reversed = logical(), + include_zero = logical() + ) +} + +# ---- twb_sheet_sorts --------------------------------------------------------- + +#' Extract sort configuration for worksheets +#' +#' Returns one row per sort directive per worksheet. +#' +#' @param x A `TwbParser` object or an `xml2` document. +#' @param sheet Optional character scalar to restrict output to one worksheet. +#' +#' @return A tibble with columns: +#' \describe{ +#' \item{sheet}{Worksheet name.} +#' \item{field_ref}{Raw column-reference attribute.} +#' \item{field_clean}{Human-readable field name.} +#' \item{datasource}{Datasource name.} +#' \item{sort_order}{`"ascending"` or `"descending"`.} +#' \item{sort_by}{Sort method: `"field"`, `"alphabetic"`, `"manual"`, `"data-source-order"`, etc.} +#' } +#' +#' @examples +#' xml <- xml2::read_xml( +#' ' +#' +#' +#' +#' +#' +#' +#'
+#'
+#'
+#'
' +#' ) +#' twb_sheet_sorts(xml) +#' +#' @export +twb_sheet_sorts <- function(x, sheet = NULL) { + xml_doc <- .twb_resolve_xml(x) + if (!is.null(sheet)) { + stopifnot(is.character(sheet), length(sheet) == 1L) + } + .ins_sheet_sorts(xml_doc, sheet) +} + +#' @keywords internal +#' @noRd +.ins_sheet_sorts <- function(xml_doc, sheet = NULL) { + ws_xpath <- if (is.null(sheet)) { + ".//worksheet" + } else { + paste0(".//worksheet[@name='", gsub("'", "", sheet, fixed = TRUE), "']") + } + ws_nodes <- xml2::xml_find_all(xml_doc, ws_xpath) + if (length(ws_nodes) == 0L) return(.empty_sorts()) + + out <- purrr::map_dfr(ws_nodes, function(ws) { + ws_name <- xml2::xml_attr(ws, "name") %||% NA_character_ + sort_nodes <- xml2::xml_find_all(ws, ".//sort[@column]") + if (length(sort_nodes) == 0L) return(tibble::tibble()) + + purrr::map_dfr(sort_nodes, function(sn) { + col_ref <- xml2::xml_attr(sn, "column") %||% NA_character_ + parsed <- .parse_col_ref(col_ref) + dir <- xml2::xml_attr(sn, "direction") %||% NA_character_ + cls <- xml2::xml_attr(sn, "class") %||% NA_character_ + + sort_by <- dplyr::case_when( + !is.na(cls) & cls %in% c("sum", "avg", "min", "max", "count") ~ "field", + !is.na(cls) & cls == "alphabetic" ~ "alphabetic", + !is.na(cls) & cls == "explicit" ~ "manual", + !is.na(cls) & cls == "data-source-order" ~ "data-source-order", + !is.na(cls) ~ cls, + TRUE ~ NA_character_ + ) + + tibble::tibble( + sheet = ws_name, + field_ref = col_ref, + field_clean = parsed$field_clean, + datasource = parsed$datasource, + sort_order = dir, + sort_by = sort_by + ) + }) + }) + if (nrow(out) == 0L) return(.empty_sorts()) + dplyr::arrange(dplyr::distinct(out), .data$sheet, .data$field_clean) +} + +.empty_sorts <- function() { + tibble::tibble( + sheet = character(), + field_ref = character(), + field_clean = character(), + datasource = character(), + sort_order = character(), + sort_by = character() + ) +} diff --git a/R/twb_parser.R b/R/twb_parser.R index 57cda37..ae55152 100644 --- a/R/twb_parser.R +++ b/R/twb_parser.R @@ -197,15 +197,59 @@ TwbParser <- R6::R6Class( get_pages = function() safe_call(.ins_pages(self$xml_doc), tibble::tibble()), get_pages_summary = function() safe_call(.ins_pages_summary(self$xml_doc), tibble::tibble()), get_page_composition = function(name) { - stopifnot(is.character(name), length(name)==1L) - safe_call(.ins_page_composition(self$xml_doc, name), tibble::tibble()) }, - get_charts = function() safe_call(.ins_charts(self$xml_doc), tibble::tibble()), - get_colors = function() safe_call(.ins_colors(self$xml_doc), tibble::tibble()), - get_dashboards = function() safe_call(.ins_dashboards(self$xml_doc), tibble::tibble()), + stopifnot(is.character(name), length(name) == 1L) + safe_call(.ins_page_composition(self$xml_doc, name), tibble::tibble()) + }, + get_charts = function() safe_call(.ins_charts(self$xml_doc), tibble::tibble()), + get_colors = function() safe_call(.ins_colors(self$xml_doc), tibble::tibble()), + get_dashboards = function() safe_call(.ins_dashboards(self$xml_doc), tibble::tibble()), get_dashboard_filters = function(dashboard = NULL) { safe_call(.ins_dashboard_filters(self$xml_doc, dashboard = dashboard), tibble::tibble()) }, - get_dashboard_summary= function() safe_call(.ins_dashboard_summary(self$xml_doc), tibble::tibble()), + get_dashboard_summary = function() safe_call(.ins_dashboard_summary(self$xml_doc), tibble::tibble()), + + # --- Phase 2/3: sheet & dashboard intelligence --- + #' @description Fields placed on visual shelves for one or all worksheets. + #' @param sheet Optional worksheet name. + get_sheet_shelves = function(sheet = NULL) { + safe_call(.ins_sheet_shelves(self$xml_doc, sheet), .empty_shelves()) + }, + + #' @description Detailed filter configuration for one or all worksheets. + #' @param sheet Optional worksheet name. + get_sheet_filters = function(sheet = NULL) { + safe_call(.ins_sheet_filters(self$xml_doc, sheet), .empty_filters()) + }, + + #' @description Axis configuration for one or all worksheets. + #' @param sheet Optional worksheet name. + get_sheet_axes = function(sheet = NULL) { + safe_call(.ins_sheet_axes(self$xml_doc, sheet), .empty_axes()) + }, + + #' @description Sort directives for one or all worksheets. + #' @param sheet Optional worksheet name. + get_sheet_sorts = function(sheet = NULL) { + safe_call(.ins_sheet_sorts(self$xml_doc, sheet), .empty_sorts()) + }, + + #' @description Worksheets embedded in one or all dashboards. + #' @param dashboard Optional dashboard name. + get_dashboard_sheets = function(dashboard = NULL) { + safe_call(.ins_dashboard_sheets(self$xml_doc, dashboard), tibble::tibble()) + }, + + #' @description Full zone layout with container hierarchy. + #' @param dashboard Optional dashboard name. + get_dashboard_layout = function(dashboard = NULL) { + safe_call(.ins_dashboard_layout(self$xml_doc, dashboard), .empty_layout()) + }, + + #' @description Dashboard and workbook actions. + #' @param dashboard Optional dashboard name to filter by. + get_dashboard_actions = function(dashboard = NULL) { + safe_call(.ins_dashboard_actions(self$xml_doc, dashboard), .empty_actions()) + }, diff --git a/R/utils.R b/R/utils.R index 16fa18a..f660b56 100644 --- a/R/utils.R +++ b/R/utils.R @@ -3,6 +3,65 @@ #' @noRd `%||%` <- function(a, b) if (!is.null(a)) a else b +# ---- Canonical field/table cleaners (single source of truth) ---------------- + +#' Clean a table reference to a human-readable name (vectorized) +#' +#' Removes `[Extract].` / `[Connection].` prefixes, strips `[ ]` brackets, and +#' drops Tableau's trailing 32-character hex suffix. +#' +#' @param x Character vector of raw table references. +#' @return Character vector of the same length; unresolvable entries become +#' `NA_character_`. +#' @keywords internal +#' @noRd +.twb_clean_table <- function(x) { + x <- as.character(x) + x <- gsub("^\\[.*?\\]\\.", "", x) # drop [Extract]. / [Connection]. + x <- gsub("\\[|\\]", "", x) # strip brackets + x <- sub("_[0-9A-Fa-f]{32}$", "", x) # drop 32-char hex suffix + x <- trimws(x) + x[is.na(x) | !nzchar(x)] <- NA_character_ + x +} + +#' Clean a field reference to a human-readable name (vectorized) +#' +#' Strips `[ ]` brackets, removes Tableau derivation prefixes (e.g., `none:`, +#' `clct:`) and takes the last dot-separated token. +#' +#' @param x Character vector of raw field references. +#' @return Character vector of the same length; unresolvable entries become +#' `NA_character_`. +#' @keywords internal +#' @noRd +.twb_clean_field <- function(x) { + x <- as.character(x) + x <- gsub("\\[|\\]", "", x) # strip brackets + + # Take last dot-separated part: "[ds].[field]" -> "field" + parts_list <- strsplit(x, "\\.", fixed = FALSE) + x <- unname(vapply(parts_list, function(p) { + p <- p[nzchar(p)] + if (!length(p)) NA_character_ else utils::tail(p, 1L) + }, character(1L))) + + # Strip Tableau column-instance wrapper "derivation:name:pivot_code" + # e.g. "none:Category:nk" -> "Category", "clct:Geometry:ok" -> "Geometry" + # Only fires when the full pattern matches (3-part, lowercase prefix & suffix). + unname(vapply(x, function(tok) { + if (is.na(tok)) return(NA_character_) + m <- regexpr("^[a-z]+:(.+):[a-z]{1,3}$", tok, perl = TRUE) + if (m > 0L) { + start <- attr(m, "capture.start")[[1L]] + len <- attr(m, "capture.length")[[1L]] + substr(tok, start, start + len - 1L) + } else { + tok + } + }, character(1L))) +} + #' Escape square/round brackets with backslashes #' @param string Character vector (or NULL) #' @return Character vector with [], () escaped; NULL passes through. diff --git a/README.md b/README.md index 89d84cc..6dd693c 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![License:MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/PrigasG/twbparser/blob/master/LICENSE) [![Lifecycle:experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html) -Parse Tableau **TWB/TWBX** files in R: extract **datasources, joins, relationships, fields, and calculated fields**, plus inspect and unpack **TWBX** assets. Built for large workbooks and Shiny integration. +Parse Tableau **TWB/TWBX** files in R: extract **datasources, joins, relationships, fields, calculated fields, worksheet configuration, and dashboard structure**, plus inspect and unpack **TWBX** assets. Built for large workbooks and Shiny integration. ## Features @@ -16,6 +16,8 @@ Parse Tableau **TWB/TWBX** files in R: extract **datasources, joins, relationshi - **Relationships & joins**: parse legacy joins and modern (2020.2+) relationships - **Calculated fields / parameters**: list formulas, datatypes, roles, and parameter metadata - **Datasources**: connection classes/targets, inferred locations, field counts +- **Worksheet intelligence**: shelves (rows/cols/encodings), filters, axes, and sorts per worksheet +- **Dashboard intelligence**: sheet positions, full zone layout tree, and filter/URL actions - **Dependency graph**: build/plot field dependency DAGs - **TWBX assets**: list/extract images, extracts, text files, etc. - **Ergonomics**: `parser$summary` (no parens), `parser$overview`, `parser$pages`, `parser$pages_summary` @@ -102,6 +104,37 @@ twb_colors(parser) ``` +Worksheet-level detail (new in 0.4.0) + +```r +# Fields on rows, cols, color, size, label … per worksheet +parser$get_sheet_shelves() +# or: twb_sheet_shelves(parser, sheet = "Sales") + +# Worksheet filters — categorical members, range min/max +parser$get_sheet_filters() + +# Axis configuration — reversed, include-zero, scale type +parser$get_sheet_axes() + +# Sort directives — direction and method +parser$get_sheet_sorts() +``` + +Dashboard structure (new in 0.4.0) + +```r +# Which sheets are on a dashboard and where? +parser$get_dashboard_sheets() +# or: twb_dashboard_sheets(parser, dashboard = "Overview") + +# Full zone tree — parent zones, component types, tiled vs floating +parser$get_dashboard_layout() + +# Filter and URL actions — source/target sheets, trigger, URL +parser$get_dashboard_actions() +``` + Relationships/Joins ```r @@ -150,11 +183,11 @@ Rscript -e "twbparser::parse_twb('my_dashboard.twb', output_dir = 'results/')" - Power BI: Export calculated field logic to replicate measures in DAX. - Data lineage: Combine with DiagrammeR or visNetwork for workflow diagrams. -## What’s new (0.3.1) +## What’s new (0.4.0) -- Page insights (pages, composition, summaries), filter positions on dashboards -- No-parens parser$summary plus read-only properties (overview, pages, pages_summary, dashboard_summary) -- Calculated fields exclude parameters by default; opt-in with include_parameters = TRUE +- **Worksheet intelligence**: `twb_sheet_shelves()`, `twb_sheet_filters()`, `twb_sheet_axes()`, `twb_sheet_sorts()` — extract the full shelf configuration, filter predicates, axis settings, and sort rules for every worksheet +- **Dashboard intelligence**: `twb_dashboard_sheets()`, `twb_dashboard_layout()`, `twb_dashboard_actions()` — inspect which sheets appear where, the full zone hierarchy, and all filter/URL actions +- **Bug fixes**: corrected edge direction in `plot_relationship_graph()`, fixed column references in `plot_source_join_graph()`, eliminated Cartesian-product explosion in `infer_implicit_relationships()` ## Contributing diff --git a/_pkgdown.yml b/_pkgdown.yml index 269599b..74aa428 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -38,6 +38,23 @@ reference: - twb_colors - twb_dashboard_summary + - title: "Worksheet intelligence" + desc: > + Per-worksheet shelf, filter, axis, and sort details. + contents: + - twb_sheet_shelves + - twb_sheet_filters + - twb_sheet_axes + - twb_sheet_sorts + + - title: "Dashboard intelligence" + desc: > + Per-dashboard zone layout, sheet positions, and actions. + contents: + - twb_dashboard_sheets + - twb_dashboard_layout + - twb_dashboard_actions + - title: "TWBX helpers" contents: - twbx_list diff --git a/cran-comments.md b/cran-comments.md index bf603cc..7f5cf43 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,12 +1,16 @@ -## twbparser 0.3.1 +## twbparser 0.4.0 -* Removed use of `unlockBinding()` in internal active-binding helpers. - The package no longer calls `unlockBinding()` or `assignInNamespace()`, - addressing the previous CRAN NOTE about possibly unsafe calls. +### Changes since 0.3.1 +* Added per-worksheet intelligence: `twb_sheet_shelves()`, `twb_sheet_filters()`, + `twb_sheet_axes()`, `twb_sheet_sorts()` and matching `TwbParser` methods. +* Added per-dashboard intelligence: `twb_dashboard_sheets()`, + `twb_dashboard_layout()`, `twb_dashboard_actions()` and matching `TwbParser` + methods. +* Fixed graph edge direction bug in `plot_relationship_graph()`. +* Fixed column name bug in `plot_source_join_graph()`. +* Centralized duplicate field/table cleaning helpers into `utils.R`. ## R CMD check results -0 errors | 0 warnings | 1 note - -* This is a new release. +0 errors | 0 warnings | 0 notes diff --git a/inst/WORDLIST b/inst/WORDLIST index 3795f75..6a7c5e8 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,12 +1,26 @@ auth Auth +behaviour calc calcs +centric +CRAN's +Encodings +encodings +param +parens +predicate +predicates +tiled +TwbParser +xlsx CMD Codecov contentUrl DAGs datasource +deduplication +dplyr Datasource datasources Datasources diff --git a/man/TwbParser.Rd b/man/TwbParser.Rd index f94c1e8..a0b7052 100644 --- a/man/TwbParser.Rd +++ b/man/TwbParser.Rd @@ -18,6 +18,10 @@ An R6 class generator. \item{exdir}{Output directory (defaults to parser's twbx dir or tempdir()).} +\item{sheet}{Optional worksheet name.} + +\item{dashboard}{Optional dashboard name to filter by.} + \item{error}{If \code{TRUE}, \code{stop()} when validation fails.} } \description{ @@ -31,6 +35,20 @@ Return TWBX image entries. Extract files from the TWBX to disk. +Fields placed on visual shelves for one or all worksheets. + +Detailed filter configuration for one or all worksheets. + +Axis configuration for one or all worksheets. + +Sort directives for one or all worksheets. + +Worksheets embedded in one or all dashboards. + +Full zone layout with container hierarchy. + +Dashboard and workbook actions. + Validate relationships; optionally stop on failure. Print a concise summary of parsed content. diff --git a/man/extract_datasource_details.Rd b/man/extract_datasource_details.Rd index 89d312d..91e8179 100644 --- a/man/extract_datasource_details.Rd +++ b/man/extract_datasource_details.Rd @@ -32,7 +32,7 @@ if (nzchar(twb) && file.exists(twb)) { head(res$data_sources) } -\dontshow{if (nzchar(system.file("extdata","test_for_zip.twbx", package = "twbparser"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (nzchar(system.file("extdata","test_for_zip.twbx", package = "twbparser"))) withAutoprint(\{ # examplesIf} # Alternative: from a tiny .twbx (guarded) twbx <- system.file("extdata", "test_for_zip.twbx", package = "twbparser") if (nzchar(twbx) && file.exists(twbx)) { diff --git a/man/extract_twb_from_twbx.Rd b/man/extract_twb_from_twbx.Rd index 5e4ad09..1a2a6c9 100644 --- a/man/extract_twb_from_twbx.Rd +++ b/man/extract_twb_from_twbx.Rd @@ -26,7 +26,7 @@ List with \code{twb_path}, \code{exdir}, \code{twbx_path}, and \code{manifest} ( Extract the .twb (and optionally all files) from a .twbx } \examples{ -\dontshow{if (nzchar(system.file("extdata", "test_for_zip.twbx", package = "twbparser"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (nzchar(system.file("extdata", "test_for_zip.twbx", package = "twbparser"))) withAutoprint(\{ # examplesIf} twbx <- system.file("extdata", "test_for_zip.twbx", package = "twbparser") res <- extract_twb_from_twbx(twbx, extract_all = FALSE) basename(res$twb_path) diff --git a/man/tbs_custom_sql_graphql.Rd b/man/tbs_custom_sql_graphql.Rd index 96dba89..2271d12 100644 --- a/man/tbs_custom_sql_graphql.Rd +++ b/man/tbs_custom_sql_graphql.Rd @@ -28,7 +28,7 @@ A tibble with columns such as \code{custom_sql_name}, \code{custom_sql_query}, Queries the Metadata (GraphQL) API for Custom SQL tables in the content graph. } \examples{ -\dontshow{if (all(nzchar(Sys.getenv(c("TABLEAU_BASE_URL","TABLEAU_SITE","TABLEAU_PAT"))))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(nzchar(Sys.getenv(c("TABLEAU_BASE_URL","TABLEAU_SITE","TABLEAU_PAT"))))) withAutoprint(\{ # examplesIf} tbs_custom_sql_graphql("abc-123") \dontshow{\}) # examplesIf} } diff --git a/man/tbs_publish_info.Rd b/man/tbs_publish_info.Rd index f7bfdf0..65ae5a5 100644 --- a/man/tbs_publish_info.Rd +++ b/man/tbs_publish_info.Rd @@ -28,7 +28,7 @@ A tibble with columns like \code{content_id}, \code{site}, \code{project}, \code Returns an empty tibble when credentials are missing or the item is not found. } \examples{ -\dontshow{if (all(nzchar(Sys.getenv(c("TABLEAU_BASE_URL","TABLEAU_SITE","TABLEAU_PAT"))))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(nzchar(Sys.getenv(c("TABLEAU_BASE_URL","TABLEAU_SITE","TABLEAU_PAT"))))) withAutoprint(\{ # examplesIf} tbs_publish_info("abc-123") \dontshow{\}) # examplesIf} } diff --git a/man/twb_dashboard_actions.Rd b/man/twb_dashboard_actions.Rd new file mode 100644 index 0000000..0a5f04d --- /dev/null +++ b/man/twb_dashboard_actions.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dashboard_details.R +\name{twb_dashboard_actions} +\alias{twb_dashboard_actions} +\title{Extract dashboard and workbook actions} +\usage{ +twb_dashboard_actions(x, dashboard = NULL) +} +\arguments{ +\item{x}{A \code{TwbParser} object or an \code{xml2} document.} + +\item{dashboard}{Optional character scalar. When supplied, only actions +whose \verb{} include a sheet from that dashboard are returned. +Pass \code{NULL} (default) to return all actions.} +} +\value{ +A tibble with columns: +\describe{ +\item{action_name}{Caption / display name of the action.} +\item{action_type}{\code{"filter"}, \code{"highlight"}, \code{"url"}, \code{"set"}, or +another type string from the XML.} +\item{source_sheets}{Comma-separated list of source worksheet names.} +\item{target_sheet}{Target worksheet name, or \code{NA} for URL actions.} +\item{run_on}{Trigger: \code{"select"}, \code{"menu"}, or \code{"hover"}.} +\item{url}{URL value for URL-type actions; \code{NA} otherwise.} +} +} +\description{ +Parses \verb{} nodes from dashboard \verb{} sections and the +top-level workbook \verb{} section. Returns one row per action. +} +\examples{ +xml <- xml2::read_xml( + ' + + + + + + + + + + + + ' +) +twb_dashboard_actions(xml) + +} diff --git a/man/twb_dashboard_layout.Rd b/man/twb_dashboard_layout.Rd new file mode 100644 index 0000000..9746d4f --- /dev/null +++ b/man/twb_dashboard_layout.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dashboard_details.R +\name{twb_dashboard_layout} +\alias{twb_dashboard_layout} +\title{Full layout of dashboard zones with container hierarchy} +\usage{ +twb_dashboard_layout(x, dashboard = NULL) +} +\arguments{ +\item{x}{A \code{TwbParser} object or an \code{xml2} document.} + +\item{dashboard}{Optional character scalar to restrict output to one +dashboard.} +} +\value{ +A tibble with columns: +\describe{ +\item{dashboard}{Dashboard name.} +\item{zone_id}{Zone identifier.} +\item{parent_zone_id}{Parent zone identifier (\code{NA} for root zones).} +\item{component_type}{\code{"worksheet"}, \code{"filter"}, \code{"legend"}, +\code{"parameter_control"}, \code{"text"}, \code{"image"}, \code{"container"}, or \code{"blank"}.} +\item{target}{Referenced worksheet name or object, if applicable.} +\item{layout_type}{\code{"floating"} or \code{"tiled"}.} +\item{x}{Horizontal offset, or \code{NA}.} +\item{y}{Vertical offset, or \code{NA}.} +\item{w}{Width, or \code{NA}.} +\item{h}{Height, or \code{NA}.} +} +} +\description{ +Returns one row per zone per dashboard, including the parent-zone relationship +and a tiled/floating classification. +} +\examples{ +xml <- xml2::read_xml( + ' + + + + + + + + + + + ' +) +twb_dashboard_layout(xml) + +} diff --git a/man/twb_dashboard_sheets.Rd b/man/twb_dashboard_sheets.Rd new file mode 100644 index 0000000..517542b --- /dev/null +++ b/man/twb_dashboard_sheets.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dashboard_details.R +\name{twb_dashboard_sheets} +\alias{twb_dashboard_sheets} +\title{List worksheets embedded in each dashboard} +\usage{ +twb_dashboard_sheets(x, dashboard = NULL) +} +\arguments{ +\item{x}{A \code{TwbParser} object or an \code{xml2} document.} + +\item{dashboard}{Optional character scalar to restrict output to one +dashboard.} +} +\value{ +A tibble with columns: +\describe{ +\item{dashboard}{Dashboard name.} +\item{sheet}{Referenced worksheet name.} +\item{zone_id}{Zone identifier.} +\item{x}{Horizontal offset (pixels), or \code{NA}.} +\item{y}{Vertical offset (pixels), or \code{NA}.} +\item{w}{Width (pixels), or \code{NA}.} +\item{h}{Height (pixels), or \code{NA}.} +} +} +\description{ +Returns one row per worksheet per dashboard, with the zone's position on the +canvas. +} +\examples{ +xml <- xml2::read_xml( + ' + + + + + + + + ' +) +twb_dashboard_sheets(xml) + +} diff --git a/man/twb_sheet_axes.Rd b/man/twb_sheet_axes.Rd new file mode 100644 index 0000000..3d7eb87 --- /dev/null +++ b/man/twb_sheet_axes.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sheet_details.R +\name{twb_sheet_axes} +\alias{twb_sheet_axes} +\title{Extract axis configuration for worksheets} +\usage{ +twb_sheet_axes(x, sheet = NULL) +} +\arguments{ +\item{x}{A \code{TwbParser} object or an \code{xml2} document.} + +\item{sheet}{Optional character scalar to restrict output to one worksheet.} +} +\value{ +A tibble with columns: +\describe{ +\item{sheet}{Worksheet name.} +\item{axis}{Axis identifier (e.g., \code{"rows"}, \code{"cols"}, or \code{"automatic"}).} +\item{field_ref}{Column reference if axis is field-specific; \code{NA} otherwise.} +\item{field_clean}{Human-readable field name; \code{NA} if not field-specific.} +\item{scale_type}{Scale type (\code{"linear"}, \code{"log"}, …) if present; \code{NA} otherwise.} +\item{reversed}{Logical: \code{TRUE} if axis is reversed.} +\item{include_zero}{Logical: \code{TRUE} if zero is pinned on axis.} +} +} +\description{ +Reads per-axis style rules embedded in worksheet XML and returns one row per +axis per worksheet. +} +\examples{ +xml <- xml2::read_xml( + ' + + + + +
+
+
+
' +) +twb_sheet_axes(xml) + +} diff --git a/man/twb_sheet_filters.Rd b/man/twb_sheet_filters.Rd new file mode 100644 index 0000000..1cd784c --- /dev/null +++ b/man/twb_sheet_filters.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sheet_details.R +\name{twb_sheet_filters} +\alias{twb_sheet_filters} +\title{Extract detailed filter configuration for worksheets} +\usage{ +twb_sheet_filters(x, sheet = NULL) +} +\arguments{ +\item{x}{A \code{TwbParser} object or an \code{xml2} document.} + +\item{sheet}{Optional character scalar to restrict output to one worksheet.} +} +\value{ +A tibble with columns: +\describe{ +\item{sheet}{Worksheet name.} +\item{field_ref}{Raw column-reference attribute value.} +\item{field_clean}{Human-readable field name.} +\item{datasource}{Datasource name.} +\item{filter_class}{Tableau filter class: \code{"categorical"}, \code{"range"}, +\code{"relative-date"}, \code{"date"}, \code{"set"}, \code{"top"}, etc.} +\item{include_mode}{\code{"include"} or \code{"exclude"}.} +\item{members}{Comma-separated member values for categorical filters; \code{NA} otherwise.} +\item{range_min}{Lower bound for range/quantitative filters; \code{NA} otherwise.} +\item{range_max}{Upper bound; \code{NA} otherwise.} +} +} +\description{ +Returns one row per filter per worksheet, with full details on filter class, +inclusion mode, categorical members, and numeric or date range bounds. +} +\examples{ +xml <- xml2::read_xml( + ' + + + + + + + + + + + +
+
+
+
' +) +twb_sheet_filters(xml) + +} diff --git a/man/twb_sheet_shelves.Rd b/man/twb_sheet_shelves.Rd new file mode 100644 index 0000000..c5287cd --- /dev/null +++ b/man/twb_sheet_shelves.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sheet_details.R +\name{twb_sheet_shelves} +\alias{twb_sheet_shelves} +\title{Extract field-to-shelf assignments for worksheets} +\usage{ +twb_sheet_shelves(x, sheet = NULL) +} +\arguments{ +\item{x}{A \code{TwbParser} object or an \code{xml2} document.} + +\item{sheet}{Optional character scalar. When supplied only that worksheet +is returned; otherwise all worksheets are returned.} +} +\value{ +A tibble with columns: +\describe{ +\item{sheet}{Worksheet name.} +\item{shelf}{Shelf name: \code{"rows"}, \code{"cols"}, or an encoding type such as +\code{"color"}, \code{"size"}, \code{"label"}, \code{"detail"}, \code{"tooltip"}, \code{"shape"}, +\code{"text"}, \code{"path"}, \code{"angle"}, \code{"lod"}, \code{"geometry"}, etc.} +\item{field_ref}{Raw column-reference attribute value.} +\item{field_instance}{Field instance name (after stripping datasource prefix).} +\item{field_clean}{Human-readable field name.} +\item{datasource}{Datasource name referenced.} +\item{aggregation}{Aggregation function (\code{"SUM"}, \code{"AVG"}, …) or \code{NA}.} +} +} +\description{ +Returns a tidy tibble describing which fields are placed on each visual +shelf (rows, cols, color, size, label, detail, tooltip, etc.) for every +worksheet in the workbook (or a single named sheet). +} +\examples{ +xml <- xml2::read_xml( + ' + + + + [ds].[Category] + [ds].[Sales] + + + + + + + + +
+
+
+
' +) +twb_sheet_shelves(xml) + +} diff --git a/man/twb_sheet_sorts.Rd b/man/twb_sheet_sorts.Rd new file mode 100644 index 0000000..6685fe5 --- /dev/null +++ b/man/twb_sheet_sorts.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sheet_details.R +\name{twb_sheet_sorts} +\alias{twb_sheet_sorts} +\title{Extract sort configuration for worksheets} +\usage{ +twb_sheet_sorts(x, sheet = NULL) +} +\arguments{ +\item{x}{A \code{TwbParser} object or an \code{xml2} document.} + +\item{sheet}{Optional character scalar to restrict output to one worksheet.} +} +\value{ +A tibble with columns: +\describe{ +\item{sheet}{Worksheet name.} +\item{field_ref}{Raw column-reference attribute.} +\item{field_clean}{Human-readable field name.} +\item{datasource}{Datasource name.} +\item{sort_order}{\code{"ascending"} or \code{"descending"}.} +\item{sort_by}{Sort method: \code{"field"}, \code{"alphabetic"}, \code{"manual"}, \code{"data-source-order"}, etc.} +} +} +\description{ +Returns one row per sort directive per worksheet. +} +\examples{ +xml <- xml2::read_xml( + ' + + + + + + +
+
+
+
' +) +twb_sheet_sorts(xml) + +} diff --git a/man/twbx_extract_files.Rd b/man/twbx_extract_files.Rd index 7252bc6..55ec69a 100644 --- a/man/twbx_extract_files.Rd +++ b/man/twbx_extract_files.Rd @@ -30,7 +30,7 @@ Tibble with \code{name}, \code{type}, and \code{out_path} of extracted files. Extract specific files from a .twbx } \examples{ -\dontshow{if (nzchar(system.file("extdata", "test_for_zip.twbx", package = "twbparser"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (nzchar(system.file("extdata", "test_for_zip.twbx", package = "twbparser"))) withAutoprint(\{ # examplesIf} twbx <- system.file("extdata", "test_for_zip.twbx", package = "twbparser") files <- twbx_extract_files(twbx, types = c("workbook")) head(files) diff --git a/man/twbx_list.Rd b/man/twbx_list.Rd index 06b3d1c..791f203 100644 --- a/man/twbx_list.Rd +++ b/man/twbx_list.Rd @@ -16,7 +16,7 @@ Tibble with columns: \code{name}, \code{size_bytes}, \code{modified}, \code{type List contents of a Tableau .twbx } \examples{ -\dontshow{if (nzchar(system.file("extdata", "test_for_zip.twbx", package = "twbparser"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (nzchar(system.file("extdata", "test_for_zip.twbx", package = "twbparser"))) withAutoprint(\{ # examplesIf} twbx <- system.file("extdata", "test_for_zip.twbx", package = "twbparser") twbx_list(twbx) \dontshow{\}) # examplesIf} diff --git a/tests/testthat/test-cleaners.R b/tests/testthat/test-cleaners.R new file mode 100644 index 0000000..d5d953d --- /dev/null +++ b/tests/testthat/test-cleaners.R @@ -0,0 +1,26 @@ +test_that(".twb_clean_table removes brackets, prefix, hex suffix", { + expect_equal(.twb_clean_table("[Extract].[Orders_A1B2C3D4E5F6A1B2C3D4E5F6A1B2C3D4]"), + "Orders") + expect_equal(.twb_clean_table("[Orders]"), "Orders") + expect_equal(.twb_clean_table(NA_character_), NA_character_) + expect_equal(.twb_clean_table(""), NA_character_) + # vectorised + out <- .twb_clean_table(c("[A]", NA, "[Extract].[B]")) + expect_equal(out, c("A", NA_character_, "B")) +}) + +test_that(".twb_clean_field strips brackets and derivation prefixes", { + expect_equal(.twb_clean_field("[none:Category:nk]"), "Category") + expect_equal(.twb_clean_field("[clct:Geometry:ok]"), "Geometry") + expect_equal(.twb_clean_field("[Sales]"), "Sales") + expect_equal(.twb_clean_field("[ds].[Field]"), "Field") + expect_equal(.twb_clean_field(NA_character_), NA_character_) + # vectorised + out <- .twb_clean_field(c("[none:Sales:qk]", NA, "[Region]")) + expect_equal(out, c("Sales", NA_character_, "Region")) +}) + +test_that(".twb_clean_table and .twb_clean_field are idempotent on clean input", { + expect_equal(.twb_clean_table("Orders"), "Orders") + expect_equal(.twb_clean_field("Sales"), "Sales") +}) diff --git a/tests/testthat/test-dashboard-details.R b/tests/testthat/test-dashboard-details.R new file mode 100644 index 0000000..274a36f --- /dev/null +++ b/tests/testthat/test-dashboard-details.R @@ -0,0 +1,171 @@ +# ---- helpers ----------------------------------------------------------------- + +make_dashboard_xml <- function() { + xml2::read_xml( + ' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ' + ) +} + +# ---- twb_dashboard_sheets ---------------------------------------------------- + +test_that("twb_dashboard_sheets returns expected columns", { + xml <- make_dashboard_xml() + out <- twb_dashboard_sheets(xml) + expect_s3_class(out, "tbl_df") + expect_named(out, c("dashboard", "sheet", "zone_id", "x", "y", "w", "h"), + ignore.order = TRUE) +}) + +test_that("twb_dashboard_sheets lists all worksheets in dashboard", { + xml <- make_dashboard_xml() + out <- twb_dashboard_sheets(xml) + expect_setequal(out$sheet, c("Sheet1", "Sheet2")) +}) + +test_that("twb_dashboard_sheets position attributes are integers", { + xml <- make_dashboard_xml() + out <- twb_dashboard_sheets(xml) + sheet1 <- out[out$sheet == "Sheet1", ] + expect_equal(sheet1$x, 0L) + expect_equal(sheet1$y, 0L) + expect_equal(sheet1$w, 600L) + expect_equal(sheet1$h, 400L) +}) + +test_that("twb_dashboard_sheets filters to a named dashboard", { + xml <- make_dashboard_xml() + out <- twb_dashboard_sheets(xml, dashboard = "Overview") + expect_true(all(out$dashboard == "Overview")) +}) + +test_that("twb_dashboard_sheets returns empty tibble for missing dashboard", { + xml <- make_dashboard_xml() + out <- twb_dashboard_sheets(xml, dashboard = "NoSuch") + expect_equal(nrow(out), 0L) +}) + +# ---- twb_dashboard_layout ---------------------------------------------------- + +test_that("twb_dashboard_layout returns expected columns", { + xml <- make_dashboard_xml() + out <- twb_dashboard_layout(xml) + expect_s3_class(out, "tbl_df") + expect_named(out, c("dashboard", "zone_id", "parent_zone_id", + "component_type", "target", "layout_type", + "x", "y", "w", "h"), + ignore.order = TRUE) +}) + +test_that("twb_dashboard_layout captures parent_zone_id correctly", { + xml <- make_dashboard_xml() + out <- twb_dashboard_layout(xml) + # zone 2 and zone 3 are children of zone 1 + z2 <- out[out$zone_id == "2", ] + z3 <- out[out$zone_id == "3", ] + expect_equal(z2$parent_zone_id, "1") + expect_equal(z3$parent_zone_id, "1") + # zone 1 is a root (parent is , not a ) + z1 <- out[out$zone_id == "1", ] + expect_true(is.na(z1$parent_zone_id)) +}) + +test_that("twb_dashboard_layout classifies component types", { + xml <- make_dashboard_xml() + out <- twb_dashboard_layout(xml) + ws_types <- out$component_type[out$zone_id %in% c("2", "4")] + expect_true(all(ws_types == "worksheet")) + filt_type <- out$component_type[out$zone_id == "3"] + expect_equal(filt_type, "filter") +}) + +test_that("twb_dashboard_layout marks layout zones as container", { + xml <- make_dashboard_xml() + out <- twb_dashboard_layout(xml) + z1 <- out[out$zone_id == "1", ] + expect_equal(z1$component_type, "container") +}) + +# ---- twb_dashboard_actions --------------------------------------------------- + +test_that("twb_dashboard_actions returns expected columns", { + xml <- make_dashboard_xml() + out <- twb_dashboard_actions(xml) + expect_s3_class(out, "tbl_df") + expect_named(out, c("action_name", "action_type", "source_sheets", + "target_sheet", "run_on", "url"), + ignore.order = TRUE) +}) + +test_that("twb_dashboard_actions extracts filter and URL actions", { + xml <- make_dashboard_xml() + out <- twb_dashboard_actions(xml) + expect_true("filter" %in% out$action_type) + expect_true("url" %in% out$action_type) +}) + +test_that("twb_dashboard_actions captures URL value", { + xml <- make_dashboard_xml() + out <- twb_dashboard_actions(xml) + url_row <- out[out$action_type == "url", ] + expect_equal(url_row$url, "https://example.com") +}) + +test_that("twb_dashboard_actions captures run_on trigger", { + xml <- make_dashboard_xml() + out <- twb_dashboard_actions(xml) + filter_row <- out[out$action_type == "filter", ] + expect_equal(filter_row$run_on, "select") +}) + +test_that("twb_dashboard_actions returns empty tibble when no actions", { + xml <- xml2::read_xml("") + out <- twb_dashboard_actions(xml) + expect_s3_class(out, "tbl_df") + expect_equal(nrow(out), 0L) +}) + +# ---- TwbParser integration --------------------------------------------------- + +test_that("TwbParser exposes new get_* methods without error", { + demo <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser") + if (!nzchar(demo) || !file.exists(demo)) skip("demo .twb not available") + + p <- TwbParser$new(demo) + expect_s3_class(p$get_sheet_shelves(), "tbl_df") + expect_s3_class(p$get_sheet_filters(), "tbl_df") + expect_s3_class(p$get_sheet_axes(), "tbl_df") + expect_s3_class(p$get_sheet_sorts(), "tbl_df") + expect_s3_class(p$get_dashboard_sheets(),"tbl_df") + expect_s3_class(p$get_dashboard_layout(),"tbl_df") + expect_s3_class(p$get_dashboard_actions(),"tbl_df") +}) diff --git a/tests/testthat/test-graph-bugs.R b/tests/testthat/test-graph-bugs.R new file mode 100644 index 0000000..8176e92 --- /dev/null +++ b/tests/testthat/test-graph-bugs.R @@ -0,0 +1,59 @@ +test_that("plot_relationship_graph uses left_field for 'from' edge (not right_field)", { + rels <- tibble::tibble( + left_table = "Orders", + left_field = "OrderID", + right_table = "Returns", + right_field = "ReturnID", + operator = "=" + ) + g <- plot_relationship_graph(rels, seed = 1L) + vnames <- igraph::V(g)$name + expect_true(any(grepl("Orders\\.OrderID", vnames))) + expect_true(any(grepl("Returns\\.ReturnID", vnames))) + # must NOT contain left_table.right_field (old bug) + expect_false(any(grepl("Orders\\.ReturnID", vnames))) +}) + +test_that("plot_source_join_graph uses left_table/right_table columns", { + joins <- tibble::tibble( + left_table = "Orders", + left_field = "OrderID", + right_table = "Returns", + right_field = "ReturnID", + join_type = "inner", + operator = "=" + ) + rels <- tibble::tibble( + left_table = "Orders", + right_table = "Returns", + left_field = "OrderID", + right_field = "ReturnID", + operator = "=" + ) + # Should not error + g_list <- expect_no_error(plot_source_join_graph(joins, rels, seed = 1L)) + expect_true(is.list(g_list) || inherits(g_list, "igraph")) +}) + +test_that("infer_implicit_relationships does not explode on duplicate field names", { + # create a scenario with many tables sharing the same field name + fields <- tibble::tibble( + datasource = rep("ds", 12L), + name = rep(c("[id]", "[name]", "[value]"), 4L), + table_clean = rep(c("A", "B", "C", "D"), each = 3L), + field_clean = rep(c("id", "name", "value"), 4L), + semantic_role = NA_character_, + is_parameter = FALSE + ) + out <- infer_implicit_relationships(fields) + expect_s3_class(out, "tbl_df") + # no duplicated canonical pairs + pairs <- paste( + pmin(paste0(out$left_table, ".", out$left_field), + paste0(out$right_table, ".", out$right_field)), + pmax(paste0(out$left_table, ".", out$left_field), + paste0(out$right_table, ".", out$right_field)), + sep = "||" + ) + expect_equal(length(pairs), length(unique(pairs))) +}) diff --git a/tests/testthat/test-sheet-details.R b/tests/testthat/test-sheet-details.R new file mode 100644 index 0000000..2d69b81 --- /dev/null +++ b/tests/testthat/test-sheet-details.R @@ -0,0 +1,182 @@ +# ---- helpers ----------------------------------------------------------------- + +make_sheet_xml <- function() { + xml2::read_xml( + ' + + + + + + + + + + + + 0 + 9999 + + + + [ds].[Category] + [ds].[Sales] + + + + + + + + + + +
+
+
+
' + ) +} + +# ---- twb_sheet_shelves ------------------------------------------------------- + +test_that("twb_sheet_shelves returns tibble with expected columns", { + xml <- make_sheet_xml() + out <- twb_sheet_shelves(xml) + expect_s3_class(out, "tbl_df") + expect_named(out, c("sheet", "shelf", "field_ref", "field_instance", + "field_clean", "datasource", "aggregation"), + ignore.order = TRUE) +}) + +test_that("twb_sheet_shelves detects rows, cols and encoding shelves", { + xml <- make_sheet_xml() + out <- twb_sheet_shelves(xml) + expect_true("rows" %in% out$shelf) + expect_true("cols" %in% out$shelf) + expect_true("color" %in% out$shelf) + expect_true("size" %in% out$shelf) + expect_true("label" %in% out$shelf) +}) + +test_that("twb_sheet_shelves filters to a single sheet", { + xml <- make_sheet_xml() + out <- twb_sheet_shelves(xml, sheet = "Sales") + expect_true(all(out$sheet == "Sales")) + + # non-existent sheet returns empty + empty <- twb_sheet_shelves(xml, sheet = "NoSuchSheet") + expect_equal(nrow(empty), 0L) +}) + +test_that("twb_sheet_shelves returns empty tibble for empty workbook", { + xml <- xml2::read_xml("") + out <- twb_sheet_shelves(xml) + expect_s3_class(out, "tbl_df") + expect_equal(nrow(out), 0L) +}) + +# ---- twb_sheet_filters ------------------------------------------------------- + +test_that("twb_sheet_filters returns expected columns", { + xml <- make_sheet_xml() + out <- twb_sheet_filters(xml) + expect_s3_class(out, "tbl_df") + expect_named(out, c("sheet", "field_ref", "field_clean", "datasource", + "filter_class", "include_mode", "members", + "range_min", "range_max"), + ignore.order = TRUE) +}) + +test_that("twb_sheet_filters extracts categorical members", { + xml <- make_sheet_xml() + out <- twb_sheet_filters(xml) + cat_row <- out[out$filter_class == "categorical", ] + expect_equal(nrow(cat_row), 1L) + expect_true(grepl("Furniture", cat_row$members, fixed = TRUE)) + expect_true(grepl("Technology", cat_row$members, fixed = TRUE)) + expect_equal(cat_row$include_mode, "include") +}) + +test_that("twb_sheet_filters extracts range min/max", { + xml <- make_sheet_xml() + out <- twb_sheet_filters(xml) + rng_row <- out[out$filter_class == "range", ] + expect_equal(nrow(rng_row), 1L) + expect_equal(rng_row$range_min, "0") + expect_equal(rng_row$range_max, "9999") +}) + +test_that("twb_sheet_filters returns empty tibble when no filters present", { + xml <- xml2::read_xml( + ' +
+
' + ) + out <- twb_sheet_filters(xml) + expect_equal(nrow(out), 0L) +}) + +# ---- twb_sheet_axes ---------------------------------------------------------- + +test_that("twb_sheet_axes returns expected columns", { + xml <- make_sheet_xml() + out <- twb_sheet_axes(xml) + expect_s3_class(out, "tbl_df") + expect_named(out, c("sheet", "axis", "field_ref", "field_clean", + "scale_type", "reversed", "include_zero"), + ignore.order = TRUE) +}) + +test_that("twb_sheet_axes parses reverse and include-zero flags", { + xml <- make_sheet_xml() + out <- twb_sheet_axes(xml) + expect_equal(nrow(out), 1L) + expect_false(out$reversed[[1L]]) + expect_true(out$include_zero[[1L]]) +}) + +test_that("twb_sheet_axes returns empty tibble when no axis rules present", { + xml <- xml2::read_xml( + ' + + ' + ) + out <- twb_sheet_axes(xml) + expect_equal(nrow(out), 0L) +}) + +# ---- twb_sheet_sorts --------------------------------------------------------- + +test_that("twb_sheet_sorts returns expected columns", { + xml <- make_sheet_xml() + out <- twb_sheet_sorts(xml) + expect_s3_class(out, "tbl_df") + expect_named(out, c("sheet", "field_ref", "field_clean", "datasource", + "sort_order", "sort_by"), + ignore.order = TRUE) +}) + +test_that("twb_sheet_sorts extracts sort direction and method", { + xml <- make_sheet_xml() + out <- twb_sheet_sorts(xml) + expect_equal(nrow(out), 1L) + expect_equal(out$sort_order[[1L]], "descending") + expect_equal(out$sort_by[[1L]], "field") +}) + +test_that("twb_sheet_sorts returns empty tibble when no sorts present", { + xml <- xml2::read_xml( + ' +
+
' + ) + out <- twb_sheet_sorts(xml) + expect_equal(nrow(out), 0L) +}) diff --git a/vignettes/twbparser-intro.Rmd b/vignettes/twbparser-intro.Rmd index 08af1cd..53428ed 100644 --- a/vignettes/twbparser-intro.Rmd +++ b/vignettes/twbparser-intro.Rmd @@ -30,7 +30,12 @@ cat("> Demo .twb not found in installed package. Skipping executable examples.\n # Introduction -`twbparser` parses Tableau `.twb` and `.twbx` workbooks and exposes datasources, relationships, joins, fields, calculated fields, and TWBX assets. It also provides page-centric insights: dashboards, worksheets, stories, their composition, filter positions, chart types, and colors/palettes. This vignette demonstrates common use cases. +`twbparser` parses Tableau `.twb` and `.twbx` workbooks and exposes datasources, +relationships, joins, fields, calculated fields, and TWBX assets. It also +provides page-centric insights — dashboards, worksheets, stories, their +composition, filter positions, chart types, and colors/palettes — as well as +per-worksheet shelf/filter/axis/sort details and per-dashboard zone layout and +actions. This vignette demonstrates common use cases. # Parse a Tableau Workbook @@ -99,6 +104,77 @@ twb_colors(parser) ``` +# Worksheet intelligence + +Each of the four functions below accepts an optional `sheet` argument to +restrict output to a single worksheet. + +## Shelves — what fields are on rows, cols, and encodings? + +```{r sheet-shelves, eval=ok} +shelves <- twb_sheet_shelves(parser) +head(shelves) +``` + +The `shelf` column distinguishes `"rows"`, `"cols"`, `"color"`, `"size"`, +`"label"`, `"detail"`, and `"tooltip"`. + +## Filters + +```{r sheet-filters, eval=ok} +filters <- twb_sheet_filters(parser) +head(filters) +``` + +Categorical filters include a comma-separated `members` column; +range filters populate `range_min` / `range_max`. + +## Axis configuration + +```{r sheet-axes, eval=ok} +axes <- twb_sheet_axes(parser) +head(axes) +``` + +## Sort directives + +```{r sheet-sorts, eval=ok} +sorts <- twb_sheet_sorts(parser) +head(sorts) +``` + +# Dashboard intelligence + +## Sheet positions + +```{r dashboard-sheets, eval=ok} +db_sheets <- twb_dashboard_sheets(parser) +head(db_sheets) +``` + +`x`, `y`, `w`, `h` are pixel coordinates within the dashboard canvas. + +## Zone layout tree + +```{r dashboard-layout, eval=ok} +layout <- twb_dashboard_layout(parser) +head(layout) +``` + +`parent_zone_id` links child zones to their container; root zones have `NA`. +`component_type` is one of `"worksheet"`, `"filter"`, `"container"`, +`"legend"`, `"parameter_control"`, `"text"`, `"image"`, or `"blank"`. + +## Actions + +```{r dashboard-actions, eval=ok} +actions <- twb_dashboard_actions(parser) +head(actions) +``` + +`action_type` is `"filter"`, `"url"`, `"highlight"`, or `"parameter"`. +`source_sheets` is a comma-separated list; `url` is populated for URL actions. + # Relationships and Joins ```{r relationships-joins, eval=exists("parser")}