From c3e9d95754fc9d46fab7becbffd0af75e97e007a Mon Sep 17 00:00:00 2001
From: George Arthur <prigasgenthian48@gmail.com>
Date: Sat, 11 Apr 2026 06:59:06 -0400
Subject: [PATCH] Phase 4: analytics, cleanup, and cheatsheet

New functions (R/analytics.R):
- twb_calc_complexity(): classify calcs as lod/table_calc/aggregate/raw,
  extract lod_type (fixed/include/exclude), compute dep_depth (longest
  calc-on-calc chain via DP on dependency DAG), n_deps token count
- twb_field_usage(): cross-workbook field x sheet matrix; long or wide form;
  context = shelf:rows / shelf:color / filter / etc.
- twb_replication_brief(): assembles all 11 intelligence sections into a
  named list or formatted text; dashboard-scoped; include_sql / include_formulas

All three wired into TwbParser as get_*() methods and active bindings.

Consistency fixes:
- twb_custom_sql(), twb_initial_sql(), twb_published_refs() now accept a
  TwbParser OR an xml2 document (via .twb_resolve_xml); full roxygen docs added
- .normalize_token() in dependency_graph.R: fixed strsplit("\.?") -> "\."
  (was splitting field names into individual characters, breaking dep_depth)
- unname() applied to .dep_depths() vapply result to prevent named vector
  propagating into tibble columns

Testing:
- tests/testthat/test-analytics.R: 30 new assertions covering LOD/table_calc/
  aggregate/raw classification, dep_depth chain lengths (0/1/2), field_usage
  long/wide/scoped forms, replication_brief structure and format="text"

Cheatsheet:
- inst/cheatsheet/twbparser-cheatsheet.tex: 3-column landscape LaTeX cheatsheet
  covering all 44 exported functions with signatures and runnable examples

R CMD check: 0 errors | 0 warnings | 0 notes

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 NAMESPACE                                |   9 +
 R/active-bindings.R                      |   4 +
 R/analytics.R                            | 584 +++++++++++++++++++++++
 R/dependency_graph.R                     |   2 +-
 R/globals.R                              |   9 +-
 R/published.R                            |  47 +-
 R/sql.R                                  |  67 ++-
 R/twb_parser.R                           |  45 ++
 inst/WORDLIST                            |   2 +
 inst/cheatsheet/twbparser-cheatsheet.tex | 576 ++++++++++++++++++++++
 man/TwbParser.Rd                         |  23 +-
 man/twb_calc_complexity.Rd               |  46 ++
 man/twb_custom_sql.Rd                    |  25 +-
 man/twb_field_usage.Rd                   |  54 +++
 man/twb_initial_sql.Rd                   |  22 +-
 man/twb_published_refs.Rd                |  26 +-
 man/twb_replication_brief.Rd             |  66 +++
 tests/testthat/test-analytics.R          | 355 ++++++++++++++
 18 files changed, 1918 insertions(+), 44 deletions(-)
 create mode 100644 R/analytics.R
 create mode 100644 inst/cheatsheet/twbparser-cheatsheet.tex
 create mode 100644 man/twb_calc_complexity.Rd
 create mode 100644 man/twb_field_usage.Rd
 create mode 100644 man/twb_replication_brief.Rd
 create mode 100644 tests/testthat/test-analytics.R

diff --git a/NAMESPACE b/NAMESPACE
index 06c8a06..4d79d10 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -20,6 +20,7 @@ export(prettify_calculated_fields)
 export(tableau_formula_pretty)
 export(tbs_custom_sql_graphql)
 export(tbs_publish_info)
+export(twb_calc_complexity)
 export(twb_charts)
 export(twb_colors)
 export(twb_custom_sql)
@@ -29,11 +30,13 @@ export(twb_dashboard_layout)
 export(twb_dashboard_sheets)
 export(twb_dashboard_summary)
 export(twb_dashboards)
+export(twb_field_usage)
 export(twb_initial_sql)
 export(twb_page_composition)
 export(twb_pages)
 export(twb_pages_summary)
 export(twb_published_refs)
+export(twb_replication_brief)
 export(twb_sheet_axes)
 export(twb_sheet_filters)
 export(twb_sheet_shelves)
@@ -67,8 +70,12 @@ importFrom(igraph,E)
 importFrom(igraph,V)
 importFrom(igraph,gorder)
 importFrom(igraph,graph_from_data_frame)
+importFrom(igraph,induced_subgraph)
+importFrom(igraph,is_dag)
 importFrom(igraph,layout_with_fr)
 importFrom(igraph,make_empty_graph)
+importFrom(igraph,neighbors)
+importFrom(igraph,topo_sort)
 importFrom(purrr,map)
 importFrom(purrr,map_chr)
 importFrom(purrr,map_dfr)
@@ -78,10 +85,12 @@ importFrom(stringr,str_extract_all)
 importFrom(stringr,str_to_title)
 importFrom(tibble,as_tibble)
 importFrom(tibble,tibble)
+importFrom(tidyr,pivot_wider)
 importFrom(tidyr,replace_na)
 importFrom(tidyr,unnest_longer)
 importFrom(tools,file_ext)
 importFrom(tools,file_path_sans_ext)
+importFrom(utils,capture.output)
 importFrom(utils,globalVariables)
 importFrom(utils,tail)
 importFrom(utils,unzip)
diff --git a/R/active-bindings.R b/R/active-bindings.R
index 7267cb2..04d453e 100644
--- a/R/active-bindings.R
+++ b/R/active-bindings.R
@@ -100,6 +100,10 @@ twb_install_active_properties <- function(x, cache = TRUE) {
   rebind("dashboard_layout",   wrap_cache("dashboard_layout",   function() x$get_dashboard_layout()))
   rebind("dashboard_actions",  wrap_cache("dashboard_actions",  function() x$get_dashboard_actions()))
 
+  ## Phase 4: analytics
+  rebind("calc_complexity",    wrap_cache("calc_complexity",    function() x$get_calc_complexity()))
+  rebind("field_usage",        wrap_cache("field_usage",        function() x$get_field_usage()))
+
   ## Validation snapshot (read-only)
   rebind(
     "validation",
diff --git a/R/analytics.R b/R/analytics.R
new file mode 100644
index 0000000..3f11a95
--- /dev/null
+++ b/R/analytics.R
@@ -0,0 +1,584 @@
+#' @importFrom igraph gorder is_dag topo_sort induced_subgraph neighbors V
+#' @importFrom dplyr mutate select arrange distinct filter bind_rows count rename
+#' @importFrom tidyr pivot_wider
+#' @importFrom tibble tibble
+#' @importFrom utils capture.output
+NULL
+
+# ---- Internal helpers --------------------------------------------------------
+
+#' Classify a vector of Tableau formulas into computation categories
+#' Precedence: lod > table_calc > aggregate > raw
+#' @keywords internal
+#' @noRd
+.classify_calc_type <- function(formula, is_table_calc) {
+  vapply(seq_along(formula), function(i) {
+    f  <- formula[[i]]
+    tc <- isTRUE(is_table_calc[[i]])
+    if (is.na(f))
+      return("raw")
+    if (grepl("\\{\\s*(FIXED|INCLUDE|EXCLUDE)\\b", f,
+              ignore.case = TRUE, perl = TRUE))
+      return("lod")
+    if (tc)
+      return("table_calc")
+    if (grepl(
+      paste0("\\b(SUM|AVG|MIN|MAX|COUNT|COUNTD|MEDIAN|STDEV|STDEVP|",
+             "VAR|VARP|ATTR|AGG|RAWSQLAGG|PERCENTILE|CORR|COVAR|COVARP)\\s*\\("),
+      f, ignore.case = TRUE, perl = TRUE))
+      return("aggregate")
+    "raw"
+  }, character(1L))
+}
+
+#' Extract the LOD sub-type (fixed / include / exclude) from a formula vector
+#' @keywords internal
+#' @noRd
+.extract_lod_type <- function(formula, calc_type) {
+  vapply(seq_along(formula), function(i) {
+    if (identical(calc_type[[i]], "lod") && !is.na(formula[[i]])) {
+      m <- regmatches(
+        formula[[i]],
+        regexpr("(FIXED|INCLUDE|EXCLUDE)", formula[[i]],
+                ignore.case = TRUE, perl = TRUE)
+      )
+      if (length(m) && nzchar(m[[1L]])) tolower(m[[1L]]) else NA_character_
+    } else {
+      NA_character_
+    }
+  }, character(1L))
+}
+
+#' Count distinct bracketed field tokens in each formula
+#' @keywords internal
+#' @noRd
+.count_formula_deps <- function(formula) {
+  vapply(formula, function(f) {
+    if (is.na(f)) 0L else length(unique(.extract_tokens(f)))
+  }, integer(1L))
+}
+
+#' Longest-path depth through the calc-field subgraph (DP over topo order)
+#' @keywords internal
+#' @noRd
+.dep_depths <- function(g, calc_names) {
+  n_calcs <- length(calc_names)
+  if (igraph::gorder(g) == 0L || n_calcs == 0L)
+    return(rep(0L, n_calcs))
+
+  all_verts  <- igraph::V(g)$name
+  calc_verts <- intersect(all_verts, calc_names)
+  if (length(calc_verts) == 0L)
+    return(rep(0L, n_calcs))
+
+  # Sub-graph of calc fields only so raw-field hops are not counted
+  sub_g     <- igraph::induced_subgraph(
+    g, vids = igraph::V(g)[all_verts %in% calc_verts]
+  )
+  sub_names <- igraph::V(sub_g)$name
+
+  if (!igraph::is_dag(sub_g)) {
+    warning(
+      "Circular dependencies detected in calculated fields. ",
+      "dep_depth is set to NA for all fields.",
+      call. = FALSE
+    )
+    return(rep(NA_integer_, n_calcs))
+  }
+
+  # DP: dp[i] = longest path from any source vertex to vertex i
+  dp    <- rep(0L, igraph::gorder(sub_g))
+  topo  <- igraph::topo_sort(sub_g, mode = "out")   # sources first
+
+  for (v in as.integer(topo)) {
+    preds <- as.integer(igraph::neighbors(sub_g, v, mode = "in"))
+    if (length(preds) > 0L)
+      dp[[v]] <- max(dp[preds]) + 1L
+  }
+
+  unname(vapply(calc_names, function(nm) {
+    idx <- match(nm, sub_names)
+    if (!is.na(idx)) dp[[idx]] else 0L
+  }, integer(1L)))
+}
+
+# ---- twb_calc_complexity -----------------------------------------------------
+
+#' Classify calculated fields by complexity
+#'
+#' Returns every calculated field in the workbook enriched with a computation
+#' category (`calc_type`), LOD sub-type, dependency count, and dependency depth
+#' — the maximum number of calc-on-calc hops in the field's dependency chain.
+#'
+#' @param x A `TwbParser` object or an `xml2` document.
+#' @param include_parameters Logical; if `TRUE`, include parameter fields
+#'   (they always land in `calc_type = "raw"` and `dep_depth = 0`).
+#'   Default `FALSE`.
+#'
+#' @return A tibble with columns:
+#' \describe{
+#'   \item{datasource}{Datasource the field belongs to.}
+#'   \item{name}{Human-readable field name.}
+#'   \item{tableau_internal_name}{Bracketed internal Tableau name.}
+#'   \item{datatype}{Field data type.}
+#'   \item{role}{`"measure"` or `"dimension"`.}
+#'   \item{calc_type}{One of `"lod"`, `"table_calc"`, `"aggregate"`, `"raw"`.
+#'     Tested in that precedence order.}
+#'   \item{lod_type}{`"fixed"`, `"include"`, or `"exclude"`; `NA` if not LOD.}
+#'   \item{is_table_calc}{Logical; existing heuristic flag preserved for
+#'     backward compatibility.}
+#'   \item{dep_depth}{Integer; longest chain of calc-on-calc dependencies.
+#'     `0` means the field only references raw fields (or has no references).}
+#'   \item{n_deps}{Integer; count of distinct bracketed tokens in the formula.}
+#'   \item{formula}{Raw formula string.}
+#' }
+#'
+#' @examples
+#' twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+#' stopifnot(nzchar(twb), file.exists(twb))
+#' xml <- xml2::read_xml(twb)
+#' twb_calc_complexity(xml)
+#'
+#' @export
+twb_calc_complexity <- function(x, include_parameters = FALSE) {
+  xml_doc <- .twb_resolve_xml(x)
+  stopifnot(
+    is.logical(include_parameters), length(include_parameters) == 1L,
+    !is.na(include_parameters)
+  )
+  .ins_calc_complexity(xml_doc, include_parameters)
+}
+
+#' @keywords internal
+#' @noRd
+.ins_calc_complexity <- function(xml_doc, include_parameters = FALSE) {
+  calcs <- extract_calculated_fields(xml_doc,
+                                     include_parameters = include_parameters)
+  if (nrow(calcs) == 0L) return(.empty_calc_complexity())
+
+  calcs <- calcs |>
+    dplyr::mutate(
+      calc_type = .classify_calc_type(formula, is_table_calc),
+      lod_type  = .extract_lod_type(formula, calc_type),
+      n_deps    = .count_formula_deps(formula)
+    )
+
+  dep_d <- .dep_depths(
+    build_dependency_graph(calcs),
+    calc_names = calcs$name
+  )
+
+  calcs |>
+    dplyr::mutate(dep_depth = dep_d) |>
+    dplyr::select(
+      "datasource", "name", "tableau_internal_name", "datatype", "role",
+      "calc_type", "lod_type", "is_table_calc", "dep_depth", "n_deps",
+      "formula"
+    ) |>
+    dplyr::arrange(.data$datasource, .data$calc_type, .data$name)
+}
+
+.empty_calc_complexity <- function() {
+  tibble::tibble(
+    datasource            = character(),
+    name                  = character(),
+    tableau_internal_name = character(),
+    datatype              = character(),
+    role                  = character(),
+    calc_type             = character(),
+    lod_type              = character(),
+    is_table_calc         = logical(),
+    dep_depth             = integer(),
+    n_deps                = integer(),
+    formula               = character()
+  )
+}
+
+# ---- twb_field_usage ---------------------------------------------------------
+
+#' Field usage matrix across worksheets
+#'
+#' Combines shelf placement and filter usage into a tidy long tibble showing
+#' where each field appears and in what capacity across all (or selected)
+#' worksheets.
+#'
+#' @param x A `TwbParser` object or an `xml2` document.
+#' @param include_filters Logical; include filter appearances. Default `TRUE`.
+#' @param include_shelves Logical; include shelf appearances (rows, cols, color,
+#'   size, etc.). Default `TRUE`.
+#' @param wide Logical; if `TRUE`, pivot to one row per field with one column
+#'   per sheet containing a comma-separated list of contexts, or `NA` if the
+#'   field does not appear on that sheet. Default `FALSE`.
+#'
+#' @return
+#' **Long form** (`wide = FALSE`): a tibble with columns:
+#' \describe{
+#'   \item{field_clean}{Human-readable field name.}
+#'   \item{datasource}{Datasource the field belongs to.}
+#'   \item{sheet}{Worksheet name.}
+#'   \item{context}{Usage context, e.g. `"shelf:rows"`, `"shelf:color"`,
+#'     `"filter"`.}
+#'   \item{n_appearances}{Number of times the field appears in this context on
+#'     this sheet (handles multi-pill rows/cols).}
+#' }
+#'
+#' **Wide form** (`wide = TRUE`): one row per `(field_clean, datasource)`,
+#' one column per sheet, cell value is a comma-separated context string or
+#' `NA`.
+#'
+#' @examples
+#' twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+#' stopifnot(nzchar(twb), file.exists(twb))
+#' xml <- xml2::read_xml(twb)
+#' twb_field_usage(xml)
+#' twb_field_usage(xml, wide = TRUE)
+#'
+#' @export
+twb_field_usage <- function(x,
+                             include_filters  = TRUE,
+                             include_shelves  = TRUE,
+                             wide             = FALSE) {
+  xml_doc <- .twb_resolve_xml(x)
+  stopifnot(
+    is.logical(include_filters),  length(include_filters)  == 1L,
+    is.logical(include_shelves),  length(include_shelves)  == 1L,
+    is.logical(wide),             length(wide)             == 1L
+  )
+  if (!include_filters && !include_shelves) {
+    message("`include_filters` and `include_shelves` are both FALSE: ",
+            "returning empty tibble.")
+    return(.empty_field_usage())
+  }
+  .ins_field_usage(xml_doc, include_filters, include_shelves, wide)
+}
+
+#' @keywords internal
+#' @noRd
+.ins_field_usage <- function(xml_doc,
+                              include_filters = TRUE,
+                              include_shelves = TRUE,
+                              wide            = FALSE) {
+  parts <- list()
+
+  if (include_shelves) {
+    sh <- .ins_sheet_shelves(xml_doc)
+    if (nrow(sh) > 0L) {
+      parts[["shelves"]] <- sh |>
+        dplyr::filter(!is.na(.data$field_clean), nzchar(.data$field_clean)) |>
+        dplyr::mutate(context = paste0("shelf:", .data$shelf)) |>
+        dplyr::select("field_clean", "datasource", "sheet", "context")
+    }
+  }
+
+  if (include_filters) {
+    fl <- .ins_sheet_filters(xml_doc)
+    if (nrow(fl) > 0L) {
+      parts[["filters"]] <- fl |>
+        dplyr::filter(!is.na(.data$field_clean), nzchar(.data$field_clean)) |>
+        dplyr::mutate(context = "filter") |>
+        dplyr::select("field_clean", "datasource", "sheet", "context")
+    }
+  }
+
+  if (length(parts) == 0L) return(.empty_field_usage())
+
+  combined <- dplyr::bind_rows(parts) |>
+    dplyr::count(.data$field_clean, .data$datasource, .data$sheet,
+                 .data$context, name = "n_appearances") |>
+    dplyr::mutate(n_appearances = as.integer(.data$n_appearances)) |>
+    dplyr::arrange(.data$field_clean, .data$sheet, .data$context)
+
+  if (!wide) return(combined)
+
+  # Wide form: one row per (field_clean, datasource), one col per sheet
+  tidyr::pivot_wider(
+    combined,
+    id_cols     = c("field_clean", "datasource"),
+    names_from  = "sheet",
+    values_from = "context",
+    values_fn   = function(ctx) paste(sort(unique(ctx)), collapse = ", ")
+  )
+}
+
+.empty_field_usage <- function() {
+  tibble::tibble(
+    field_clean   = character(),
+    datasource    = character(),
+    sheet         = character(),
+    context       = character(),
+    n_appearances = integer()
+  )
+}
+
+# ---- twb_replication_brief ---------------------------------------------------
+
+#' Replication brief for a Tableau workbook or dashboard
+#'
+#' Assembles all extracted intelligence — datasources, parameters, calculated
+#' fields with complexity classifications, field usage, filters, sorts, chart
+#' types, dashboard layout, and actions — into a single named list (or
+#' formatted text) ready for use when porting to another visualisation tool.
+#'
+#' @param x A `TwbParser` object or an `xml2` document.
+#' @param dashboard Optional character scalar. When supplied, sheet-level
+#'   sections (filters, sorts, chart types, field usage, layout) are scoped to
+#'   the sheets that belong to this dashboard.
+#' @param include_sql Logical; include custom SQL blocks in `$custom_sql`.
+#'   Default `TRUE`.
+#' @param include_formulas Logical; when `TRUE`, a `formula_pretty` column is
+#'   added to `$calculated_fields`. Default `TRUE`.
+#' @param format Either `"list"` (default) to return a named R list, or
+#'   `"text"` to return a single formatted character string suitable for
+#'   printing or writing to a file.
+#'
+#' @return
+#' **`format = "list"`**: a named list with elements:
+#' \describe{
+#'   \item{meta}{1-row tibble: file name, counts, generation timestamp.}
+#'   \item{datasources}{Datasource connection details.}
+#'   \item{parameters}{Parameter fields with current values.}
+#'   \item{custom_sql}{Custom SQL blocks, or `NULL` if `include_sql = FALSE`.}
+#'   \item{calculated_fields}{Tibble from [twb_calc_complexity()], optionally
+#'     with a `formula_pretty` column.}
+#'   \item{field_usage}{Tibble from [twb_field_usage()].}
+#'   \item{filters}{Worksheet filters (scoped to `dashboard` if given).}
+#'   \item{sorts}{Worksheet sorts (scoped to `dashboard` if given).}
+#'   \item{chart_types}{Mark types per worksheet.}
+#'   \item{dashboard_layout}{Zone positions from [twb_dashboard_sheets()].}
+#'   \item{actions}{Dashboard actions from [twb_dashboard_actions()].}
+#' }
+#'
+#' **`format = "text"`**: a single `character(1)` with section headers and
+#' tabular output.
+#'
+#' @examples
+#' twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+#' stopifnot(nzchar(twb), file.exists(twb))
+#' xml <- xml2::read_xml(twb)
+#' brief <- twb_replication_brief(xml)
+#' names(brief)
+#' brief$meta
+#'
+#' @export
+twb_replication_brief <- function(x,
+                                   dashboard        = NULL,
+                                   include_sql      = TRUE,
+                                   include_formulas = TRUE,
+                                   format           = c("list", "text")) {
+  xml_doc <- .twb_resolve_xml(x)
+  format  <- match.arg(format)
+
+  if (!is.null(dashboard)) {
+    stopifnot(is.character(dashboard), length(dashboard) == 1L)
+    dashboard <- gsub("'", "", dashboard, fixed = TRUE)
+  }
+  stopifnot(
+    is.logical(include_sql),      length(include_sql)      == 1L,
+    is.logical(include_formulas), length(include_formulas) == 1L
+  )
+
+  brief <- .ins_replication_brief(xml_doc, x, dashboard,
+                                   include_sql, include_formulas)
+
+  if (identical(format, "text")) return(.brief_to_text(brief))
+  brief
+}
+
+#' @keywords internal
+#' @noRd
+.ins_replication_brief <- function(xml_doc, x_orig, dashboard,
+                                    include_sql, include_formulas) {
+  # -- meta -------------------------------------------------------------------
+  workbook_file <- if (inherits(x_orig, "TwbParser")) {
+    basename(x_orig$path %||% "")
+  } else {
+    "<inline>"
+  }
+
+  pages        <- .ins_pages(xml_doc)
+  n_worksheets <- sum(pages$page_type == "worksheet", na.rm = TRUE)
+  n_dashboards <- sum(pages$page_type == "dashboard", na.rm = TRUE)
+
+  ds_details <- tryCatch(
+    extract_datasource_details(xml_doc),
+    error = function(e) list(data_sources = tibble::tibble(),
+                             parameters   = tibble::tibble())
+  )
+  calcs  <- .ins_calc_complexity(xml_doc)
+  params <- tryCatch(extract_parameters(xml_doc), error = function(e) tibble::tibble())
+
+  meta <- tibble::tibble(
+    workbook_file       = workbook_file,
+    n_datasources       = nrow(ds_details$data_sources),
+    n_worksheets        = as.integer(n_worksheets),
+    n_dashboards        = as.integer(n_dashboards),
+    n_calculated_fields = nrow(calcs),
+    n_parameters        = nrow(params),
+    generated_at        = format(Sys.time(), "%Y-%m-%d %H:%M:%S")
+  )
+
+  # -- datasources & parameters -----------------------------------------------
+  datasources <- ds_details$data_sources
+  parameters  <- params
+
+  # -- custom SQL -------------------------------------------------------------
+  custom_sql <- if (include_sql) {
+    tryCatch(twb_custom_sql(xml_doc), error = function(e) tibble::tibble())
+  } else {
+    NULL
+  }
+
+  # -- calculated fields ------------------------------------------------------
+  calc_fields <- calcs
+  if (include_formulas && nrow(calc_fields) > 0L) {
+    pretty <- tryCatch(
+      prettify_calculated_fields(
+        calc_fields |> dplyr::select("name", "formula"),
+        wrap = 100L
+      ),
+      error = function(e) NULL
+    )
+    if (!is.null(pretty) && "formula_pretty" %in% names(pretty)) {
+      calc_fields <- dplyr::bind_rows(
+        calc_fields,
+        tibble::tibble(formula_pretty = pretty$formula_pretty)
+      ) |>
+        # safe merge: join on name to avoid row-count mismatch
+        (\(cf) {
+          cf |> dplyr::select(-"formula_pretty") |>
+            dplyr::bind_cols(
+              tibble::tibble(
+                formula_pretty = pretty$formula_pretty[
+                  match(cf$name, pretty$name)
+                ]
+              )
+            )
+        })()
+    }
+  }
+
+  # -- determine sheet scope --------------------------------------------------
+  scoped_sheets <- if (!is.null(dashboard)) {
+    db_sh <- tryCatch(
+      .ins_dashboard_sheets(xml_doc, dashboard),
+      error = function(e) tibble::tibble(sheet = character())
+    )
+    if (nrow(db_sh) > 0L) unique(db_sh$sheet) else character()
+  } else {
+    NULL   # NULL means all sheets
+  }
+
+  .scope <- function(tbl, col = "sheet") {
+    if (is.null(scoped_sheets) || !col %in% names(tbl)) return(tbl)
+    dplyr::filter(tbl, .data[[col]] %in% scoped_sheets)
+  }
+
+  # -- field usage ------------------------------------------------------------
+  field_usage <- tryCatch(
+    .scope(.ins_field_usage(xml_doc, include_filters = TRUE,
+                             include_shelves = TRUE, wide = FALSE)),
+    error = function(e) .empty_field_usage()
+  )
+
+  # -- filters & sorts --------------------------------------------------------
+  filters <- tryCatch(
+    .scope(.ins_sheet_filters(xml_doc)),
+    error = function(e) .empty_filters()
+  )
+  sorts <- tryCatch(
+    .scope(.ins_sheet_sorts(xml_doc)),
+    error = function(e) .empty_sorts()
+  )
+
+  # -- chart types ------------------------------------------------------------
+  chart_types <- tryCatch({
+    ct <- .ins_charts(xml_doc)
+    if (!is.null(scoped_sheets) && "worksheet" %in% names(ct))
+      ct <- dplyr::filter(ct, .data$worksheet %in% scoped_sheets)
+    ct
+  }, error = function(e) tibble::tibble())
+
+  # -- dashboard layout & actions ---------------------------------------------
+  db_layout <- tryCatch(
+    .ins_dashboard_sheets(xml_doc, dashboard),
+    error = function(e) tibble::tibble()
+  )
+  actions <- tryCatch(
+    .ins_dashboard_actions(xml_doc, dashboard),
+    error = function(e) tibble::tibble()
+  )
+
+  list(
+    meta               = meta,
+    datasources        = datasources,
+    parameters         = parameters,
+    custom_sql         = custom_sql,
+    calculated_fields  = calc_fields,
+    field_usage        = field_usage,
+    filters            = filters,
+    sorts              = sorts,
+    chart_types        = chart_types,
+    dashboard_layout   = db_layout,
+    actions            = actions
+  )
+}
+
+# ---- text formatter ----------------------------------------------------------
+
+#' @keywords internal
+#' @noRd
+.brief_to_text <- function(brief) {
+  lines <- character()
+
+  .hdr <- function(title) c(paste0("## ", title), "")
+
+  .tbl <- function(tbl) {
+    if (is.null(tbl) || (is.data.frame(tbl) && nrow(tbl) == 0L))
+      return(c("  (none)", ""))
+    c(utils::capture.output(print(as.data.frame(tbl), row.names = FALSE)),
+      "")
+  }
+
+  .kv <- function(label, value) {
+    sprintf("  %-25s %s", paste0(label, ":"), value)
+  }
+
+  # Header
+  lines <- c(lines, "# TWBPARSER REPLICATION BRIEF", "")
+
+  # Meta
+  m <- brief$meta
+  lines <- c(
+    lines,
+    .hdr("WORKBOOK"),
+    .kv("File",              m$workbook_file),
+    .kv("Generated at",      m$generated_at),
+    .kv("Datasources",       m$n_datasources),
+    .kv("Worksheets",        m$n_worksheets),
+    .kv("Dashboards",        m$n_dashboards),
+    .kv("Calculated fields", m$n_calculated_fields),
+    .kv("Parameters",        m$n_parameters),
+    ""
+  )
+
+  .section <- function(title, tbl) c(.hdr(title), .tbl(tbl))
+
+  lines <- c(lines,
+    .section("DATASOURCES",       brief$datasources),
+    .section("PARAMETERS",        brief$parameters)
+  )
+  if (!is.null(brief$custom_sql))
+    lines <- c(lines, .section("CUSTOM SQL", brief$custom_sql))
+
+  lines <- c(lines,
+    .section("CALCULATED FIELDS (complexity)", brief$calculated_fields),
+    .section("FIELD USAGE",                    brief$field_usage),
+    .section("FILTERS",                        brief$filters),
+    .section("SORTS",                          brief$sorts),
+    .section("CHART TYPES",                    brief$chart_types),
+    .section("DASHBOARD LAYOUT",               brief$dashboard_layout),
+    .section("ACTIONS",                        brief$actions)
+  )
+
+  paste(lines, collapse = "\n")
+}
diff --git a/R/dependency_graph.R b/R/dependency_graph.R
index eb6645b..365ef81 100644
--- a/R/dependency_graph.R
+++ b/R/dependency_graph.R
@@ -8,7 +8,7 @@
   tok <- sub("^([^:]+:)+", "", tok) # remove prefixes like "none:" or "clct:"
 
   # Split table-qualified references: [Table].[Field] -> take Field
-  parts <- strsplit(tok, "\\.?", fixed = FALSE)[[1]]
+  parts <- strsplit(tok, "\\.", fixed = FALSE)[[1]]
   parts <- parts[nzchar(parts)]
   parts <- gsub("^\\s+|\\s+$", "", parts)
   if (length(parts) == 0) {
diff --git a/R/globals.R b/R/globals.R
index 3913aa0..47f3afc 100644
--- a/R/globals.R
+++ b/R/globals.R
@@ -27,5 +27,12 @@ utils::globalVariables(c(
   # dashboard_details
   "action_name", "action_type", "source_sheets", "target_sheet", "run_on",
   "url", "layout_type", "parent_zone_id", "component_type", "zone_id",
-  "sheet"
+  "sheet",
+  # analytics (Phase 4)
+  "is_table_calc",
+  "calc_type", "lod_type", "dep_depth", "n_deps",
+  "n_appearances", "context",
+  "workbook_file", "n_datasources", "n_worksheets", "n_dashboards",
+  "n_calculated_fields", "n_parameters", "generated_at",
+  "formula_pretty"
 ))
diff --git a/R/published.R b/R/published.R
index 8b62d26..355e571 100644
--- a/R/published.R
+++ b/R/published.R
@@ -1,28 +1,51 @@
-#' Detect likely references to published data sources (vs embedded)
+#' Detect references to published data sources
+#'
+#' Inspects datasource nodes and heuristically flags those that reference a
+#' published (server-side) source rather than an embedded one.
+#'
+#' @param x A `TwbParser` object **or** an `xml2` document.
+#'
+#' @return A tibble with columns:
+#' \describe{
+#'   \item{name}{Internal datasource name.}
+#'   \item{caption}{User-visible caption.}
+#'   \item{hasconn}{Value of the `hasconnection` attribute.}
+#'   \item{likely_published}{`TRUE` when `hasconnection = false` or when the
+#'     node text contains published-source markers.}
+#'   \item{hints}{Short explanation of the classification.}
+#' }
+#'
+#' @examples
+#' twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+#' stopifnot(nzchar(twb), file.exists(twb))
+#' xml <- xml2::read_xml(twb)
+#' twb_published_refs(xml)
+#'
 #' @importFrom rlang .data
-#' @param xml_doc An xml2 document for a .twb
-#' @return tibble with datasource name, caption, likely_published, hints
 #' @export
-twb_published_refs <- function(xml_doc) {
-  stopifnot(inherits(xml_doc, "xml_document"))
+twb_published_refs <- function(x) {
+  xml_doc <- .twb_resolve_xml(x)
   dsn <- xml2::xml_find_all(xml_doc, "//datasource")
-  if (length(dsn) == 0) {
+  if (length(dsn) == 0L) {
     return(tibble::tibble(
-      name = character(), caption = character(),
-      likely_published = logical(), hints = character()
+      name             = character(),
+      caption          = character(),
+      likely_published = logical(),
+      hints            = character()
     ))
   }
   tibble::tibble(
     name    = xml2::xml_attr(dsn, "name"),
     caption = xml2::xml_attr(dsn, "caption"),
     hasconn = xml2::xml_attr(dsn, "hasconnection"),
-    raw     = vapply(dsn, xml2::xml_text, character(1))
+    raw     = vapply(dsn, xml2::xml_text, character(1L))
   ) |>
     dplyr::mutate(
       likely_published =
         .data$hasconn %in% c("false", "0") |
-        stringr::str_detect(.data$raw,
-                            "(?i)published|tableau server|tableau cloud|catalog-id|content-url"
+        stringr::str_detect(
+          .data$raw,
+          "(?i)published|tableau server|tableau cloud|catalog-id|content-url"
         ),
       hints = dplyr::if_else(
         .data$likely_published,
@@ -30,5 +53,5 @@ twb_published_refs <- function(xml_doc) {
         "embedded or no published markers"
       )
     ) |>
-    dplyr::select(-raw)
+    dplyr::select(-"raw")
 }
diff --git a/R/sql.R b/R/sql.R
index c874ccf..28a85e9 100644
--- a/R/sql.R
+++ b/R/sql.R
@@ -1,10 +1,29 @@
-#' Extract Custom SQL relations from a TWB XML
+#' Extract Custom SQL relations from a Tableau workbook
+#'
+#' Finds every `<relation formula="...">` node that looks like a SQL statement
+#' and returns its name, type, raw SQL text, and a flag for whether it starts
+#' with `SELECT` or `WITH`.
+#'
+#' @param x A `TwbParser` object **or** an `xml2` document.
+#'
+#' @return A tibble with columns:
+#' \describe{
+#'   \item{relation_name}{Name attribute of the relation node.}
+#'   \item{relation_type}{Type attribute (e.g. `"text"`, `"table"`).}
+#'   \item{custom_sql}{Full SQL text.}
+#'   \item{is_custom_sql}{`TRUE` when the text begins with `SELECT` or `WITH`.}
+#' }
+#'
+#' @examples
+#' twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+#' stopifnot(nzchar(twb), file.exists(twb))
+#' xml <- xml2::read_xml(twb)
+#' twb_custom_sql(xml)
+#'
 #' @importFrom rlang .data
-#' @param xml_doc An xml2 document for a .twb
-#' @return tibble with relation_name, relation_type, custom_sql
 #' @export
-twb_custom_sql <- function(xml_doc) {
-  stopifnot(inherits(xml_doc, "xml_document"))
+twb_custom_sql <- function(x) {
+  xml_doc <- .twb_resolve_xml(x)
   rels <- xml2::xml_find_all(xml_doc, "//relation[@formula]")
   tibble::tibble(
     relation_name = xml2::xml_attr(rels, "name"),
@@ -14,22 +33,42 @@ twb_custom_sql <- function(xml_doc) {
     dplyr::filter(!is.na(.data$custom_sql)) |>
     dplyr::mutate(
       is_custom_sql = dplyr::coalesce(
-        stringr::str_detect(.data$custom_sql,
-                            stringr::regex("^\\s*(select|with)\\b", ignore_case = TRUE)
+        stringr::str_detect(
+          .data$custom_sql,
+          stringr::regex("^\\s*(select|with)\\b", ignore_case = TRUE)
         ),
         FALSE
       )
     )
 }
 
-#' Extract Initial SQL statements from connections (if present)
-#' @param xml_doc An xml2 document for a .twb
-#' @return tibble with connection_id, initial_sql
+#' Extract Initial SQL statements from Tableau connections
+#'
+#' Returns any `<initial-sql>` nodes found inside connection or
+#' named-connection elements.
+#'
+#' @param x A `TwbParser` object **or** an `xml2` document.
+#'
+#' @return A tibble with columns:
+#' \describe{
+#'   \item{connection_id}{Name or caption of the parent connection element.}
+#'   \item{initial_sql}{SQL text of the initial statement.}
+#' }
+#'
+#' @examples
+#' twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+#' stopifnot(nzchar(twb), file.exists(twb))
+#' xml <- xml2::read_xml(twb)
+#' twb_initial_sql(xml)
+#'
 #' @export
-twb_initial_sql <- function(xml_doc) {
-  stopifnot(inherits(xml_doc, "xml_document"))
-  nodes <- xml2::xml_find_all(xml_doc, "//connection/initial-sql | //named-connection/initial-sql")
-  if (length(nodes) == 0) {
+twb_initial_sql <- function(x) {
+  xml_doc <- .twb_resolve_xml(x)
+  nodes <- xml2::xml_find_all(
+    xml_doc,
+    "//connection/initial-sql | //named-connection/initial-sql"
+  )
+  if (length(nodes) == 0L) {
     return(tibble::tibble(connection_id = character(), initial_sql = character()))
   }
   tibble::tibble(
diff --git a/R/twb_parser.R b/R/twb_parser.R
index ae55152..ef86634 100644
--- a/R/twb_parser.R
+++ b/R/twb_parser.R
@@ -251,7 +251,52 @@ TwbParser <- R6::R6Class(
       safe_call(.ins_dashboard_actions(self$xml_doc, dashboard), .empty_actions())
     },
 
+    # --- Phase 4: analytics ---
 
+    #' @description Calculated field complexity classifications.
+    #' @param include_parameters Logical; include parameter fields. Default `FALSE`.
+    get_calc_complexity = function(include_parameters = FALSE) {
+      safe_call(
+        twb_calc_complexity(self$xml_doc, include_parameters = include_parameters),
+        .empty_calc_complexity()
+      )
+    },
+
+    #' @description Field usage matrix across worksheets.
+    #' @param include_filters Include filter appearances. Default `TRUE`.
+    #' @param include_shelves Include shelf appearances. Default `TRUE`.
+    #' @param wide Return wide format (one col per sheet). Default `FALSE`.
+    get_field_usage = function(include_filters  = TRUE,
+                               include_shelves  = TRUE,
+                               wide             = FALSE) {
+      safe_call(
+        twb_field_usage(self$xml_doc,
+                        include_filters = include_filters,
+                        include_shelves = include_shelves,
+                        wide            = wide),
+        .empty_field_usage()
+      )
+    },
+
+    #' @description Full replication brief for the workbook or a single dashboard.
+    #' @param dashboard Optional dashboard name to scope the brief.
+    #' @param include_sql Include custom SQL blocks. Default `TRUE`.
+    #' @param include_formulas Add `formula_pretty` to calculated fields.
+    #'   Default `TRUE`.
+    #' @param format `"list"` (default) or `"text"`.
+    get_replication_brief = function(dashboard        = NULL,
+                                     include_sql      = TRUE,
+                                     include_formulas = TRUE,
+                                     format           = c("list", "text")) {
+      safe_call(
+        twb_replication_brief(self,
+                              dashboard        = dashboard,
+                              include_sql      = include_sql,
+                              include_formulas = include_formulas,
+                              format           = match.arg(format)),
+        list()
+      )
+    },
 
     # --- validator bridge ---
     #' @description Validate relationships; optionally stop on failure.
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 6a7c5e8..a888798 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -21,6 +21,8 @@ DAGs
 datasource
 deduplication
 dplyr
+LOD
+visualisation
 Datasource
 datasources
 Datasources
diff --git a/inst/cheatsheet/twbparser-cheatsheet.tex b/inst/cheatsheet/twbparser-cheatsheet.tex
new file mode 100644
index 0000000..1ad0703
--- /dev/null
+++ b/inst/cheatsheet/twbparser-cheatsheet.tex
@@ -0,0 +1,576 @@
+% twbparser R Package Cheatsheet
+% Compile with: pdflatex twbparser-cheatsheet.tex (twice)
+% Requires: geometry, multicol, tcolorbox, listings, xcolor, hyperref, fontenc, inputenc
+
+\documentclass[10pt,landscape]{article}
+
+% ---- Page geometry -----------------------------------------------------------
+\usepackage[
+  landscape,
+  top=6mm, bottom=6mm,
+  left=6mm, right=6mm,
+  includehead=false
+]{geometry}
+
+% ---- Encoding & fonts --------------------------------------------------------
+\usepackage[T1]{fontenc}
+\usepackage[utf8]{inputenc}
+\usepackage{lmodern}
+\usepackage{microtype}
+
+% ---- Colours -----------------------------------------------------------------
+\usepackage{xcolor}
+\definecolor{twbBlue}   {HTML}{1F4E79}   % deep navy
+\definecolor{twbMid}    {HTML}{2E75B6}   % mid blue
+\definecolor{twbLight}  {HTML}{DDEEFF}   % pale blue fill
+\definecolor{twbGreen}  {HTML}{1E6B3C}   % dark green
+\definecolor{twbGreenL} {HTML}{D6F0E0}   % pale green fill
+\definecolor{twbOrange} {HTML}{BF4A00}   % orange / warning
+\definecolor{twbOrangeL}{HTML}{FFE8D6}   % pale orange
+\definecolor{twbPurple} {HTML}{5B2D8E}   % purple
+\definecolor{twbPurpleL}{HTML}{EDE0F5}   % pale purple
+\definecolor{twbGray}   {HTML}{4A4A4A}   % body text
+\definecolor{codeBack}  {HTML}{F5F5F5}   % code background
+\definecolor{codeFore}  {HTML}{1A1A1A}   % code text
+
+% ---- Code listings -----------------------------------------------------------
+\usepackage{listings}
+\lstset{
+  language=R,
+  basicstyle=\ttfamily\scriptsize\color{codeFore},
+  backgroundcolor=\color{codeBack},
+  frame=none,
+  breaklines=true,
+  breakatwhitespace=true,
+  tabsize=2,
+  keepspaces=true,
+  showstringspaces=false,
+  keywordstyle=\color{twbMid}\bfseries,
+  commentstyle=\color{twbGray}\itshape,
+  stringstyle=\color{twbGreen},
+  literate={<-}{{{\color{twbOrange}<-}}}2
+            {|}{{{\color{twbOrange}|}}}1,
+}
+
+% ---- Boxes -------------------------------------------------------------------
+\usepackage[most]{tcolorbox}
+
+% Section header box
+\newtcolorbox{secbox}[2][twbBlue]{
+  colback=#1, colframe=#1,
+  coltext=white,
+  fonttitle=\bfseries\small,
+  title=#2,
+  top=1pt, bottom=1pt, left=3pt, right=3pt,
+  boxsep=0pt, arc=2pt,
+  before skip=3pt, after skip=2pt
+}
+
+% Sub-section box (lighter fill, dark border)
+\newtcolorbox{subbox}[2][twbLight]{
+  colback=#1, colframe=twbMid,
+  coltext=twbGray,
+  fonttitle=\bfseries\scriptsize\color{twbBlue},
+  title=#2,
+  top=1pt, bottom=1pt, left=3pt, right=3pt,
+  boxsep=0pt, arc=2pt,
+  before skip=2pt, after skip=1pt,
+  leftrule=2pt, rightrule=0pt, toprule=0pt, bottomrule=0pt
+}
+
+% Tip / note box
+\newtcolorbox{tipbox}{
+  colback=twbOrangeL, colframe=twbOrange,
+  coltext=twbGray,
+  fonttitle=\bfseries\scriptsize\color{twbOrange},
+  title=TIP,
+  top=1pt, bottom=1pt, left=3pt, right=3pt,
+  boxsep=0pt, arc=2pt,
+  before skip=2pt, after skip=1pt,
+  leftrule=2pt, rightrule=0pt, toprule=0pt, bottomrule=0pt
+}
+
+% ---- Layout helpers ----------------------------------------------------------
+\usepackage{multicol}
+\setlength{\columnsep}{4mm}
+\setlength{\columnseprule}{0.3pt}
+\def\columnseprulecolor{\color{twbMid!40}}
+
+\usepackage{parskip}
+\setlength{\parskip}{1pt}
+\setlength{\parindent}{0pt}
+
+\usepackage{enumitem}
+\setlist[itemize]{
+  leftmargin=8pt, itemsep=0pt, parsep=0pt, topsep=0pt,
+  label=\textcolor{twbMid}{\textbullet}
+}
+
+% ---- Hyperref ----------------------------------------------------------------
+\usepackage{hyperref}
+\hypersetup{colorlinks, urlcolor=twbMid, linkcolor=twbMid}
+
+% ---- Helpers -----------------------------------------------------------------
+\newcommand{\fn}[1]{\texttt{\textbf{\color{twbBlue}#1}}}
+\newcommand{\pkg}[1]{\texttt{\color{twbGreen}#1}}
+\newcommand{\ret}[1]{\textit{\scriptsize\color{twbGray}→ #1}}
+\newcommand{\opt}[1]{\textcolor{twbOrange}{\texttt{#1}}}
+\newcommand{\sep}{\vspace{1pt}\hrule\vspace{2pt}}
+
+\pagestyle{empty}
+
+% ==============================================================================
+\begin{document}
+% ==============================================================================
+
+% ---- Header ------------------------------------------------------------------
+\begin{tcolorbox}[
+  colback=twbBlue, colframe=twbBlue,
+  coltext=white, arc=3pt,
+  top=4pt, bottom=4pt, left=6pt, right=6pt,
+  before skip=0pt, after skip=4pt
+]
+  \begin{minipage}{0.72\linewidth}
+    {\Large\bfseries twbparser}\quad
+    {\normalsize\color{twbLight} v0.4.0 \quad|\quad Parse Tableau .twb/.twbx files into tidy data}\\[1pt]
+    {\scriptsize\color{twbLight}%
+      \texttt{pak::pak("PrigasG/twbparser")} \quad
+      \texttt{devtools::install\_github("PrigasG/twbparser")} \quad
+      \href{https://prigasg.github.io/twbparser/}{prigasg.github.io/twbparser}}
+  \end{minipage}%
+  \hfill
+  \begin{minipage}{0.26\linewidth}\raggedleft
+    {\scriptsize\color{twbLight}
+      All \texttt{twb\_*()} functions accept\\
+      a \pkg{TwbParser} object \textbf{or} an \texttt{xml2} document.\\
+      Active bindings available as \texttt{parser\$property}.}
+  \end{minipage}
+\end{tcolorbox}
+
+% ---- Three-column body -------------------------------------------------------
+\begin{multicols}{3}
+
+%% ============================================================
+%% COLUMN 1
+%% ============================================================
+
+% ---- Getting Started ---------------------------------------------------------
+\begin{secbox}[twbBlue]{Getting Started}
+\end{secbox}
+
+\begin{lstlisting}
+library(twbparser)
+
+# Parse a .twb or .twbx file
+parser <- TwbParser$new("dashboard.twbx")
+
+# Quick console summary (no parens needed)
+parser$summary
+
+# 1-row overview tibble
+parser$overview
+\end{lstlisting}
+
+\begin{subbox}[twbLight]{Key active bindings (no parens)}
+\begin{lstlisting}
+parser$overview          # 1-row summary tibble
+parser$pages             # all pages
+parser$pages_summary     # per-page counts
+parser$dashboard_summary # per-dashboard stats
+parser$datasources       # datasource details
+parser$parameters_tbl    # parameter fields
+parser$fields_tbl        # raw fields
+\end{lstlisting}
+\end{subbox}
+
+\sep
+
+% ---- Data Model --------------------------------------------------------------
+\begin{secbox}[twbMid]{Data Model}
+\end{secbox}
+
+\textbf{Datasources \& parameters}
+
+\begin{lstlisting}
+# Named connections (server, file, db ...)
+parser$get_datasources()      # -> tibble
+parser$get_parameters()       # -> tibble
+parser$get_datasources_all()  # combined
+
+# Detect published vs. embedded sources
+twb_published_refs(parser)
+\end{lstlisting}
+
+\textbf{Fields}
+
+\begin{lstlisting}
+# All raw (non-calc) fields
+parser$get_fields()
+
+# Calculated fields + formulas
+parser$get_calculated_fields(
+  pretty           = TRUE,   # add formula_pretty
+  strip_brackets   = FALSE,
+  wrap             = 100L,
+  include_parameters = FALSE
+)
+
+# Low-level helpers (accept xml_doc)
+extract_raw_fields(xml_doc)
+extract_calculated_fields(xml_doc)
+extract_parameters(xml_doc)
+extract_columns_with_table_source(xml_doc)
+\end{lstlisting}
+
+\textbf{SQL}
+
+\begin{lstlisting}
+twb_custom_sql(parser)   # Custom SQL blocks
+twb_initial_sql(parser)  # Initial SQL per conn
+\end{lstlisting}
+
+\sep
+
+% ---- Relationships & Joins ---------------------------------------------------
+\begin{secbox}[twbGreen]{Relationships \& Joins}
+\end{secbox}
+
+\begin{lstlisting}
+# Modern logical-model relationships (2020.2+)
+parser$get_relationships()   # -> tibble
+
+# Legacy SQL joins
+parser$get_joins()           # -> tibble
+parser$get_relations()       # raw <relation> nodes
+
+# Infer implicit pairs by field-name / role
+infer_implicit_relationships(
+  fields_df,          # from get_fields()
+  max_pairs = 50000L
+)
+
+# Validate endpoints + predicate fields
+validate_relationships(parser, strict = FALSE)
+\end{lstlisting}
+
+\begin{subbox}[twbGreenL]{Returned columns}
+\begin{lstlisting}
+# get_relationships() ->
+#  left_table | left_field | operator
+#  right_table | right_field | datasource_left
+
+# get_joins() ->
+#  left_table | left_field | right_table
+#  right_field | join_type | operator
+\end{lstlisting}
+\end{subbox}
+
+%% ============================================================
+%% COLUMN 2
+%% ============================================================
+\columnbreak
+
+% ---- Worksheet Intelligence --------------------------------------------------
+\begin{secbox}[twbPurple]{Worksheet Intelligence}
+\end{secbox}
+
+All four accept \opt{sheet = "Sheet1"} to restrict to one worksheet.
+
+\textbf{Shelf assignments}
+
+\begin{lstlisting}
+twb_sheet_shelves(parser)
+# sheet | shelf | field_clean | datasource
+# shelf: "rows","cols","color","size",
+#        "label","detail","tooltip","shape"
+
+# also: parser$get_sheet_shelves()
+#        parser$sheet_shelves
+\end{lstlisting}
+
+\textbf{Worksheet filters}
+
+\begin{lstlisting}
+twb_sheet_filters(parser, sheet = "Sales")
+# sheet | field_clean | datasource
+# filter_class | include_mode
+# members      | range_min | range_max
+\end{lstlisting}
+
+\textbf{Axis configuration}
+
+\begin{lstlisting}
+twb_sheet_axes(parser)
+# sheet | axis | field_clean
+# scale_type | reversed | include_zero
+\end{lstlisting}
+
+\textbf{Sort directives}
+
+\begin{lstlisting}
+twb_sheet_sorts(parser)
+# sheet | field_clean | datasource
+# sort_order | sort_by
+# sort_by: "field","alphabetic","manual"
+\end{lstlisting}
+
+\sep
+
+% ---- Dashboard Intelligence --------------------------------------------------
+\begin{secbox}[twbPurple]{Dashboard Intelligence}
+\end{secbox}
+
+All three accept \opt{dashboard = "Overview"} to scope to one dashboard.
+
+\textbf{Sheet positions}
+
+\begin{lstlisting}
+twb_dashboard_sheets(parser)
+# dashboard | sheet | zone_id
+# x | y | w | h   (pixel coords)
+
+twb_dashboards(parser)   # high-level overview
+twb_dashboard_summary(parser) # filter + chart counts
+\end{lstlisting}
+
+\textbf{Full zone layout tree}
+
+\begin{lstlisting}
+twb_dashboard_layout(parser, dashboard = "Exec")
+# dashboard | zone_id | parent_zone_id
+# component_type | layout_type | x | y | w | h
+# component_type: "worksheet","filter",
+#   "container","legend","parameter_control",
+#   "text","image","blank"
+# layout_type: "tiled" or "floating"
+\end{lstlisting}
+
+\textbf{Actions}
+
+\begin{lstlisting}
+twb_dashboard_actions(parser)
+# action_name | action_type | source_sheets
+# target_sheet | run_on | url
+# action_type: "filter","url","highlight","parameter"
+# run_on: "select","menu","hover"
+\end{lstlisting}
+
+\textbf{Filters on dashboards}
+
+\begin{lstlisting}
+twb_dashboard_filters(parser)  # filter positions
+twb_page_composition(parser, "Executive Dashboard")
+twb_pages(parser)          # all pages list
+twb_pages_summary(parser)  # per-page stats
+\end{lstlisting}
+
+\sep
+
+% ---- Chart types & Colors ----------------------------------------------------
+\begin{secbox}[twbMid]{Mark Types \& Colors}
+\end{secbox}
+
+\begin{lstlisting}
+twb_charts(parser)  # worksheet | mark_types
+twb_colors(parser)  # palette_name | kind | colors
+\end{lstlisting}
+
+%% ============================================================
+%% COLUMN 3
+%% ============================================================
+\columnbreak
+
+% ---- Analytics (Phase 4) ----------------------------------------------------
+\begin{secbox}[twbOrange]{Analytics — Port to Another Tool}
+\end{secbox}
+
+\textbf{Calculated field complexity}
+
+\begin{lstlisting}
+twb_calc_complexity(parser)
+# datasource | name | datatype | role
+# calc_type  | lod_type | is_table_calc
+# dep_depth  | n_deps   | formula
+#
+# calc_type values (precedence order):
+#  "lod"        FIXED/INCLUDE/EXCLUDE
+#  "table_calc" RUNNING_SUM, WINDOW_*, RANK, ...
+#  "aggregate"  SUM, COUNT, MEDIAN, ...
+#  "raw"        string ops, IF/THEN, literals
+#
+# lod_type: "fixed" | "include" | "exclude" | NA
+# dep_depth: calc-on-calc chain length (0 = raw deps)
+
+# also: parser$get_calc_complexity()
+#        parser$calc_complexity
+\end{lstlisting}
+
+\textbf{Field usage matrix}
+
+\begin{lstlisting}
+# Long form (default)
+twb_field_usage(parser)
+# field_clean | datasource | sheet
+# context     | n_appearances
+# context: "shelf:rows","shelf:color","filter",...
+
+# Wide form — one column per sheet
+twb_field_usage(parser, wide = TRUE)
+
+# Limit to shelves or filters only
+twb_field_usage(parser, include_filters = FALSE)
+twb_field_usage(parser, include_shelves = FALSE)
+
+# also: parser$get_field_usage(wide = TRUE)
+#        parser$field_usage
+\end{lstlisting}
+
+\textbf{Full replication brief}
+
+\begin{lstlisting}
+# Named list — all 11 sections assembled
+brief <- twb_replication_brief(parser)
+names(brief)
+# meta | datasources | parameters | custom_sql
+# calculated_fields | field_usage | filters
+# sorts | chart_types | dashboard_layout | actions
+
+# Scope to one dashboard
+brief <- twb_replication_brief(
+  parser,
+  dashboard        = "Overview",
+  include_sql      = TRUE,
+  include_formulas = TRUE   # adds formula_pretty
+)
+
+# Human-readable text output
+cat(twb_replication_brief(parser, format = "text"))
+
+# also: parser$get_replication_brief(dashboard = "X")
+\end{lstlisting}
+
+\sep
+
+% ---- Dependency Graphs -------------------------------------------------------
+\begin{secbox}[twbGreen]{Dependency Graphs}
+\end{secbox}
+
+\begin{lstlisting}
+calcs <- parser$get_calculated_fields()
+
+# Build igraph DAG (input -> output field)
+g <- build_dependency_graph(calcs)
+
+# Plot with base graphics
+plot_dependency_graph(g, calcs, seed = 42)
+
+# Plot field-level relationship DAG
+plot_relationship_graph(
+  parser$get_relationships(), seed = 42)
+
+# Plot source join structure
+plot_source_join_graph(
+  parser$get_joins(),
+  relationships_df = parser$get_relationships())
+\end{lstlisting}
+
+\sep
+
+% ---- TWBX Handling -----------------------------------------------------------
+\begin{secbox}[twbBlue]{TWBX Handling}
+\end{secbox}
+
+\begin{lstlisting}
+# List all files in the archive
+twbx_list("dashboard.twbx")
+# Name | Length | Date | type
+# type: "workbook","extract","image","text",...
+
+# Extract the embedded .twb
+extract_twb_from_twbx(
+  "dashboard.twbx",
+  extract_dir = tempdir())
+
+# Extract specific files by type / pattern
+twbx_extract_files(
+  "dashboard.twbx",
+  types   = "image",          # or "extract"
+  pattern = "^Data/.*\\.hyper$",
+  exdir   = "out/")
+
+# TwbParser handles .twbx automatically
+parser <- TwbParser$new("dashboard.twbx")
+parser$twbx_manifest   # contents tibble
+parser$get_twbx_extracts()
+parser$get_twbx_images()
+\end{lstlisting}
+
+\sep
+
+% ---- Formula Utilities -------------------------------------------------------
+\begin{secbox}[twbMid]{Formula Utilities}
+\end{secbox}
+
+\begin{lstlisting}
+# Pretty-print a single formula string
+tableau_formula_pretty(
+  "IF [Sales] > 0 THEN [Sales] ELSE 0 END",
+  strip_brackets = FALSE,
+  wrap           = 80L)
+
+# Add formula_pretty column to a tibble
+prettify_calculated_fields(
+  calcs,           # tibble with 'formula' col
+  strip_brackets = FALSE,
+  wrap           = 100L)
+\end{lstlisting}
+
+\sep
+
+% ---- Tableau Server (optional) -----------------------------------------------
+\begin{secbox}[twbGray]{Tableau Server / Cloud (optional)}
+\end{secbox}
+
+\begin{lstlisting}
+# Publish info via REST API
+tbs_publish_info(
+  content_id = "abc123",
+  base_url   = "https://server",
+  site       = "MySite",
+  token      = Sys.getenv("TABLEAU_TOKEN"))
+
+# Custom SQL via Metadata GraphQL API
+tbs_custom_sql_graphql(
+  content_id = "abc123",
+  base_url   = "https://server",
+  site       = "MySite",
+  token      = Sys.getenv("TABLEAU_TOKEN"))
+\end{lstlisting}
+
+\begin{tipbox}
+\textbf{All \texttt{twb\_*()} functions} also work with a raw
+\texttt{xml2} document: \texttt{xml <- xml2::read\_xml("file.twb")}.
+Use the \texttt{TwbParser} class for caching, \texttt{.twbx} support,
+and the no-parens active bindings.
+\end{tipbox}
+
+\end{multicols}
+
+% ---- Footer ------------------------------------------------------------------
+\begin{tcolorbox}[
+  colback=twbBlue!10, colframe=twbBlue!30,
+  coltext=twbGray, arc=2pt,
+  top=2pt, bottom=2pt, left=4pt, right=4pt,
+  before skip=2pt, after skip=0pt
+]
+  \scriptsize
+  \textbf{twbparser} \textcolor{twbMid}{0.4.0} \quad
+  MIT License \quad
+  \href{https://github.com/PrigasG/twbparser}{github.com/PrigasG/twbparser} \quad
+  \href{https://prigasg.github.io/twbparser/}{prigasg.github.io/twbparser} \quad
+  Bugs: \href{https://github.com/PrigasG/twbparser/issues}{github.com/PrigasG/twbparser/issues}
+  \hfill
+  \textit{Cheatsheet compiled with \LaTeX{} — see \texttt{inst/cheatsheet/}}
+\end{tcolorbox}
+
+\end{document}
diff --git a/man/TwbParser.Rd b/man/TwbParser.Rd
index a0b7052..fd494bf 100644
--- a/man/TwbParser.Rd
+++ b/man/TwbParser.Rd
@@ -20,7 +20,22 @@ An R6 class generator.
 
 \item{sheet}{Optional worksheet name.}
 
-\item{dashboard}{Optional dashboard name to filter by.}
+\item{include_parameters}{Logical; include parameter fields. Default \code{FALSE}.}
+
+\item{include_filters}{Include filter appearances. Default \code{TRUE}.}
+
+\item{include_shelves}{Include shelf appearances. Default \code{TRUE}.}
+
+\item{wide}{Return wide format (one col per sheet). Default \code{FALSE}.}
+
+\item{dashboard}{Optional dashboard name to scope the brief.}
+
+\item{include_sql}{Include custom SQL blocks. Default \code{TRUE}.}
+
+\item{include_formulas}{Add \code{formula_pretty} to calculated fields.
+Default \code{TRUE}.}
+
+\item{format}{\code{"list"} (default) or \code{"text"}.}
 
 \item{error}{If \code{TRUE}, \code{stop()} when validation fails.}
 }
@@ -49,6 +64,12 @@ Full zone layout with container hierarchy.
 
 Dashboard and workbook actions.
 
+Calculated field complexity classifications.
+
+Field usage matrix across worksheets.
+
+Full replication brief for the workbook or a single dashboard.
+
 Validate relationships; optionally stop on failure.
 
 Print a concise summary of parsed content.
diff --git a/man/twb_calc_complexity.Rd b/man/twb_calc_complexity.Rd
new file mode 100644
index 0000000..6af6a61
--- /dev/null
+++ b/man/twb_calc_complexity.Rd
@@ -0,0 +1,46 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/analytics.R
+\name{twb_calc_complexity}
+\alias{twb_calc_complexity}
+\title{Classify calculated fields by complexity}
+\usage{
+twb_calc_complexity(x, include_parameters = FALSE)
+}
+\arguments{
+\item{x}{A \code{TwbParser} object or an \code{xml2} document.}
+
+\item{include_parameters}{Logical; if \code{TRUE}, include parameter fields
+(they always land in \code{calc_type = "raw"} and \code{dep_depth = 0}).
+Default \code{FALSE}.}
+}
+\value{
+A tibble with columns:
+\describe{
+\item{datasource}{Datasource the field belongs to.}
+\item{name}{Human-readable field name.}
+\item{tableau_internal_name}{Bracketed internal Tableau name.}
+\item{datatype}{Field data type.}
+\item{role}{\code{"measure"} or \code{"dimension"}.}
+\item{calc_type}{One of \code{"lod"}, \code{"table_calc"}, \code{"aggregate"}, \code{"raw"}.
+Tested in that precedence order.}
+\item{lod_type}{\code{"fixed"}, \code{"include"}, or \code{"exclude"}; \code{NA} if not LOD.}
+\item{is_table_calc}{Logical; existing heuristic flag preserved for
+backward compatibility.}
+\item{dep_depth}{Integer; longest chain of calc-on-calc dependencies.
+\code{0} means the field only references raw fields (or has no references).}
+\item{n_deps}{Integer; count of distinct bracketed tokens in the formula.}
+\item{formula}{Raw formula string.}
+}
+}
+\description{
+Returns every calculated field in the workbook enriched with a computation
+category (\code{calc_type}), LOD sub-type, dependency count, and dependency depth
+— the maximum number of calc-on-calc hops in the field's dependency chain.
+}
+\examples{
+twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+stopifnot(nzchar(twb), file.exists(twb))
+xml <- xml2::read_xml(twb)
+twb_calc_complexity(xml)
+
+}
diff --git a/man/twb_custom_sql.Rd b/man/twb_custom_sql.Rd
index 71dac97..ac3ef38 100644
--- a/man/twb_custom_sql.Rd
+++ b/man/twb_custom_sql.Rd
@@ -2,16 +2,31 @@
 % Please edit documentation in R/sql.R
 \name{twb_custom_sql}
 \alias{twb_custom_sql}
-\title{Extract Custom SQL relations from a TWB XML}
+\title{Extract Custom SQL relations from a Tableau workbook}
 \usage{
-twb_custom_sql(xml_doc)
+twb_custom_sql(x)
 }
 \arguments{
-\item{xml_doc}{An xml2 document for a .twb}
+\item{x}{A \code{TwbParser} object \strong{or} an \code{xml2} document.}
 }
 \value{
-tibble with relation_name, relation_type, custom_sql
+A tibble with columns:
+\describe{
+\item{relation_name}{Name attribute of the relation node.}
+\item{relation_type}{Type attribute (e.g. \code{"text"}, \code{"table"}).}
+\item{custom_sql}{Full SQL text.}
+\item{is_custom_sql}{\code{TRUE} when the text begins with \code{SELECT} or \code{WITH}.}
+}
 }
 \description{
-Extract Custom SQL relations from a TWB XML
+Finds every \verb{<relation formula="...">} node that looks like a SQL statement
+and returns its name, type, raw SQL text, and a flag for whether it starts
+with \code{SELECT} or \code{WITH}.
+}
+\examples{
+twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+stopifnot(nzchar(twb), file.exists(twb))
+xml <- xml2::read_xml(twb)
+twb_custom_sql(xml)
+
 }
diff --git a/man/twb_field_usage.Rd b/man/twb_field_usage.Rd
new file mode 100644
index 0000000..29b6551
--- /dev/null
+++ b/man/twb_field_usage.Rd
@@ -0,0 +1,54 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/analytics.R
+\name{twb_field_usage}
+\alias{twb_field_usage}
+\title{Field usage matrix across worksheets}
+\usage{
+twb_field_usage(
+  x,
+  include_filters = TRUE,
+  include_shelves = TRUE,
+  wide = FALSE
+)
+}
+\arguments{
+\item{x}{A \code{TwbParser} object or an \code{xml2} document.}
+
+\item{include_filters}{Logical; include filter appearances. Default \code{TRUE}.}
+
+\item{include_shelves}{Logical; include shelf appearances (rows, cols, color,
+size, etc.). Default \code{TRUE}.}
+
+\item{wide}{Logical; if \code{TRUE}, pivot to one row per field with one column
+per sheet containing a comma-separated list of contexts, or \code{NA} if the
+field does not appear on that sheet. Default \code{FALSE}.}
+}
+\value{
+\strong{Long form} (\code{wide = FALSE}): a tibble with columns:
+\describe{
+\item{field_clean}{Human-readable field name.}
+\item{datasource}{Datasource the field belongs to.}
+\item{sheet}{Worksheet name.}
+\item{context}{Usage context, e.g. \code{"shelf:rows"}, \code{"shelf:color"},
+\code{"filter"}.}
+\item{n_appearances}{Number of times the field appears in this context on
+this sheet (handles multi-pill rows/cols).}
+}
+
+\strong{Wide form} (\code{wide = TRUE}): one row per \verb{(field_clean, datasource)},
+one column per sheet, cell value is a comma-separated context string or
+\code{NA}.
+}
+\description{
+Combines shelf placement and filter usage into a tidy long tibble showing
+where each field appears and in what capacity across all (or selected)
+worksheets.
+}
+\examples{
+twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+stopifnot(nzchar(twb), file.exists(twb))
+xml <- xml2::read_xml(twb)
+twb_field_usage(xml)
+twb_field_usage(xml, wide = TRUE)
+
+}
diff --git a/man/twb_initial_sql.Rd b/man/twb_initial_sql.Rd
index 63a1cd2..7075da3 100644
--- a/man/twb_initial_sql.Rd
+++ b/man/twb_initial_sql.Rd
@@ -2,16 +2,28 @@
 % Please edit documentation in R/sql.R
 \name{twb_initial_sql}
 \alias{twb_initial_sql}
-\title{Extract Initial SQL statements from connections (if present)}
+\title{Extract Initial SQL statements from Tableau connections}
 \usage{
-twb_initial_sql(xml_doc)
+twb_initial_sql(x)
 }
 \arguments{
-\item{xml_doc}{An xml2 document for a .twb}
+\item{x}{A \code{TwbParser} object \strong{or} an \code{xml2} document.}
 }
 \value{
-tibble with connection_id, initial_sql
+A tibble with columns:
+\describe{
+\item{connection_id}{Name or caption of the parent connection element.}
+\item{initial_sql}{SQL text of the initial statement.}
+}
 }
 \description{
-Extract Initial SQL statements from connections (if present)
+Returns any \verb{<initial-sql>} nodes found inside connection or
+named-connection elements.
+}
+\examples{
+twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+stopifnot(nzchar(twb), file.exists(twb))
+xml <- xml2::read_xml(twb)
+twb_initial_sql(xml)
+
 }
diff --git a/man/twb_published_refs.Rd b/man/twb_published_refs.Rd
index 1d24670..9e0272b 100644
--- a/man/twb_published_refs.Rd
+++ b/man/twb_published_refs.Rd
@@ -2,16 +2,32 @@
 % Please edit documentation in R/published.R
 \name{twb_published_refs}
 \alias{twb_published_refs}
-\title{Detect likely references to published data sources (vs embedded)}
+\title{Detect references to published data sources}
 \usage{
-twb_published_refs(xml_doc)
+twb_published_refs(x)
 }
 \arguments{
-\item{xml_doc}{An xml2 document for a .twb}
+\item{x}{A \code{TwbParser} object \strong{or} an \code{xml2} document.}
 }
 \value{
-tibble with datasource name, caption, likely_published, hints
+A tibble with columns:
+\describe{
+\item{name}{Internal datasource name.}
+\item{caption}{User-visible caption.}
+\item{hasconn}{Value of the \code{hasconnection} attribute.}
+\item{likely_published}{\code{TRUE} when \code{hasconnection = false} or when the
+node text contains published-source markers.}
+\item{hints}{Short explanation of the classification.}
+}
 }
 \description{
-Detect likely references to published data sources (vs embedded)
+Inspects datasource nodes and heuristically flags those that reference a
+published (server-side) source rather than an embedded one.
+}
+\examples{
+twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+stopifnot(nzchar(twb), file.exists(twb))
+xml <- xml2::read_xml(twb)
+twb_published_refs(xml)
+
 }
diff --git a/man/twb_replication_brief.Rd b/man/twb_replication_brief.Rd
new file mode 100644
index 0000000..e1e1f04
--- /dev/null
+++ b/man/twb_replication_brief.Rd
@@ -0,0 +1,66 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/analytics.R
+\name{twb_replication_brief}
+\alias{twb_replication_brief}
+\title{Replication brief for a Tableau workbook or dashboard}
+\usage{
+twb_replication_brief(
+  x,
+  dashboard = NULL,
+  include_sql = TRUE,
+  include_formulas = TRUE,
+  format = c("list", "text")
+)
+}
+\arguments{
+\item{x}{A \code{TwbParser} object or an \code{xml2} document.}
+
+\item{dashboard}{Optional character scalar. When supplied, sheet-level
+sections (filters, sorts, chart types, field usage, layout) are scoped to
+the sheets that belong to this dashboard.}
+
+\item{include_sql}{Logical; include custom SQL blocks in \verb{$custom_sql}.
+Default \code{TRUE}.}
+
+\item{include_formulas}{Logical; when \code{TRUE}, a \code{formula_pretty} column is
+added to \verb{$calculated_fields}. Default \code{TRUE}.}
+
+\item{format}{Either \code{"list"} (default) to return a named R list, or
+\code{"text"} to return a single formatted character string suitable for
+printing or writing to a file.}
+}
+\value{
+\strong{\code{format = "list"}}: a named list with elements:
+\describe{
+\item{meta}{1-row tibble: file name, counts, generation timestamp.}
+\item{datasources}{Datasource connection details.}
+\item{parameters}{Parameter fields with current values.}
+\item{custom_sql}{Custom SQL blocks, or \code{NULL} if \code{include_sql = FALSE}.}
+\item{calculated_fields}{Tibble from \code{\link[=twb_calc_complexity]{twb_calc_complexity()}}, optionally
+with a \code{formula_pretty} column.}
+\item{field_usage}{Tibble from \code{\link[=twb_field_usage]{twb_field_usage()}}.}
+\item{filters}{Worksheet filters (scoped to \code{dashboard} if given).}
+\item{sorts}{Worksheet sorts (scoped to \code{dashboard} if given).}
+\item{chart_types}{Mark types per worksheet.}
+\item{dashboard_layout}{Zone positions from \code{\link[=twb_dashboard_sheets]{twb_dashboard_sheets()}}.}
+\item{actions}{Dashboard actions from \code{\link[=twb_dashboard_actions]{twb_dashboard_actions()}}.}
+}
+
+\strong{\code{format = "text"}}: a single \code{character(1)} with section headers and
+tabular output.
+}
+\description{
+Assembles all extracted intelligence — datasources, parameters, calculated
+fields with complexity classifications, field usage, filters, sorts, chart
+types, dashboard layout, and actions — into a single named list (or
+formatted text) ready for use when porting to another visualisation tool.
+}
+\examples{
+twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+stopifnot(nzchar(twb), file.exists(twb))
+xml <- xml2::read_xml(twb)
+brief <- twb_replication_brief(xml)
+names(brief)
+brief$meta
+
+}
diff --git a/tests/testthat/test-analytics.R b/tests/testthat/test-analytics.R
new file mode 100644
index 0000000..8c1feee
--- /dev/null
+++ b/tests/testthat/test-analytics.R
@@ -0,0 +1,355 @@
+## Tests for Phase 4: twb_calc_complexity, twb_field_usage, twb_replication_brief
+
+# ---- helpers -----------------------------------------------------------------
+
+.make_calc_xml <- function(...) {
+  calcs <- list(...)
+  col_nodes <- vapply(calcs, function(c) {
+    caption_attr <- if (!is.null(c$caption)) sprintf(' caption="%s"', c$caption) else ""
+    sprintf(
+      '<column name="%s"%s datatype="%s" role="%s">
+         <calculation class="tableau" formula="%s"/>
+       </column>',
+      c$name, caption_attr,
+      c$datatype %||% "string", c$role %||% "measure",
+      gsub('"', "&quot;", c$formula %||% "")
+    )
+  }, character(1L))
+  xml2::read_xml(sprintf(
+    '<workbook>
+       <datasources>
+         <datasource name="ds1">%s</datasource>
+       </datasources>
+       <worksheets/>
+     </workbook>',
+    paste(col_nodes, collapse = "\n")
+  ))
+}
+
+`%||%` <- function(a, b) if (!is.null(a)) a else b
+
+# ---- twb_calc_complexity: columns -------------------------------------------
+
+test_that("twb_calc_complexity returns expected columns", {
+  xml <- .make_calc_xml(
+    list(name = "[Sales]", formula = "SUM([Price])"),
+    list(name = "[Count]", formula = "COUNT([Orders])")
+  )
+  out <- twb_calc_complexity(xml)
+  expected_cols <- c("datasource", "name", "tableau_internal_name",
+                     "datatype", "role", "calc_type", "lod_type",
+                     "is_table_calc", "dep_depth", "n_deps", "formula")
+  expect_true(all(expected_cols %in% names(out)))
+})
+
+# ---- twb_calc_complexity: LOD detection -------------------------------------
+
+test_that("twb_calc_complexity detects FIXED LOD", {
+  xml <- .make_calc_xml(
+    list(name = "[FixedSales]", formula = "{ FIXED [Category] : SUM([Sales]) }")
+  )
+  out <- twb_calc_complexity(xml)
+  expect_equal(out$calc_type, "lod")
+  expect_equal(out$lod_type, "fixed")
+})
+
+test_that("twb_calc_complexity detects INCLUDE LOD", {
+  xml <- .make_calc_xml(
+    list(name = "[IncSales]", formula = "{ INCLUDE [Category] : SUM([Sales]) }")
+  )
+  out <- twb_calc_complexity(xml)
+  expect_equal(out$calc_type, "lod")
+  expect_equal(out$lod_type, "include")
+})
+
+test_that("twb_calc_complexity detects EXCLUDE LOD", {
+  xml <- .make_calc_xml(
+    list(name = "[ExclSales]", formula = "{ EXCLUDE [Category] : SUM([Sales]) }")
+  )
+  out <- twb_calc_complexity(xml)
+  expect_equal(out$calc_type, "lod")
+  expect_equal(out$lod_type, "exclude")
+})
+
+# ---- twb_calc_complexity: table_calc detection ------------------------------
+
+test_that("twb_calc_complexity detects table calcs", {
+  xml <- .make_calc_xml(
+    list(name = "[Running]", formula = "RUNNING_SUM(SUM([Sales]))")
+  )
+  out <- twb_calc_complexity(xml)
+  expect_equal(out$calc_type, "table_calc")
+  expect_true(out$is_table_calc)
+})
+
+# ---- twb_calc_complexity: aggregate detection -------------------------------
+
+test_that("twb_calc_complexity classifies SUM as aggregate", {
+  xml <- .make_calc_xml(
+    list(name = "[TotalSales]", formula = "SUM([Sales])")
+  )
+  out <- twb_calc_complexity(xml)
+  expect_equal(out$calc_type, "aggregate")
+  expect_true(is.na(out$lod_type))
+})
+
+test_that("twb_calc_complexity classifies COUNTD as aggregate", {
+  xml <- .make_calc_xml(
+    list(name = "[UniqueCustomers]", formula = "COUNTD([Customer ID])")
+  )
+  out <- twb_calc_complexity(xml)
+  expect_equal(out$calc_type, "aggregate")
+})
+
+# ---- twb_calc_complexity: raw classification --------------------------------
+
+test_that("twb_calc_complexity classifies string concat as raw", {
+  xml <- .make_calc_xml(
+    list(name = "[FullName]", formula = "[First] + &quot; &quot; + [Last]",
+         datatype = "string")
+  )
+  out <- twb_calc_complexity(xml)
+  expect_equal(out$calc_type, "raw")
+  expect_true(is.na(out$lod_type))
+})
+
+# ---- twb_calc_complexity: LOD wins over table_calc precedence ---------------
+
+test_that("LOD wins over table_calc when both patterns match", {
+  # Contrived formula with both patterns
+  xml <- .make_calc_xml(
+    list(name = "[Weird]",
+         formula = "{ FIXED [Cat] : RUNNING_SUM(SUM([Sales])) }")
+  )
+  out <- twb_calc_complexity(xml)
+  expect_equal(out$calc_type, "lod")
+})
+
+# ---- twb_calc_complexity: dep_depth -----------------------------------------
+
+test_that("dep_depth is 0 for direct formula with no calc deps", {
+  xml <- .make_calc_xml(
+    list(name = "[Revenue]", formula = "SUM([Sales])")
+  )
+  out <- twb_calc_complexity(xml)
+  expect_equal(out$dep_depth, 0L)
+})
+
+test_that("dep_depth is 1 for calc depending on another calc", {
+  xml <- .make_calc_xml(
+    list(name = "[Revenue]",  formula = "SUM([Sales])"),
+    list(name = "[Revenue2]", formula = "[Revenue] * 2")
+  )
+  out <- twb_calc_complexity(xml)
+  revenue2 <- out[out$name == "Revenue2", ]
+  expect_equal(revenue2$dep_depth, 1L)
+})
+
+test_that("dep_depth is 2 for three-level calc chain", {
+  xml <- .make_calc_xml(
+    list(name = "[A]", formula = "SUM([x])"),
+    list(name = "[B]", formula = "[A] * 2"),
+    list(name = "[C]", formula = "[B] + 1")
+  )
+  out <- twb_calc_complexity(xml)
+  cc  <- out[out$name == "C", ]
+  expect_equal(cc$dep_depth, 2L)
+})
+
+# ---- twb_calc_complexity: empty workbook ------------------------------------
+
+test_that("twb_calc_complexity returns typed empty tibble for no calcs", {
+  xml <- xml2::read_xml(
+    '<workbook><datasources/><worksheets/></workbook>'
+  )
+  out <- twb_calc_complexity(xml)
+  expect_equal(nrow(out), 0L)
+  expect_true("calc_type" %in% names(out))
+  expect_true("dep_depth" %in% names(out))
+})
+
+# ---- twb_field_usage: basic columns -----------------------------------------
+
+test_that("twb_field_usage returns expected columns (long form)", {
+  xml <- xml2::read_xml(
+    '<workbook>
+       <worksheets>
+         <worksheet name="Sheet1">
+           <table>
+             <view>
+               <datasources>
+                 <datasource name="ds1" caption="ds1"/>
+               </datasources>
+               <columns>
+                 <column worksheet="Sheet1" name="[none:Sales:qk]"
+                         datasource-name="ds1">
+                   <encodings>
+                     <rows column="[ds1].[none:Sales:qk]"/>
+                   </encodings>
+                 </column>
+               </columns>
+             </view>
+             <rows>[ds1].[none:Sales:qk]</rows>
+             <cols></cols>
+           </table>
+         </worksheet>
+       </worksheets>
+     </workbook>'
+  )
+  out <- twb_field_usage(xml)
+  expect_true(all(c("field_clean", "datasource", "sheet",
+                    "context", "n_appearances") %in% names(out)))
+})
+
+test_that("twb_field_usage wide=TRUE returns one row per field", {
+  xml <- xml2::read_xml(
+    '<workbook>
+       <worksheets>
+         <worksheet name="Sheet1">
+           <table>
+             <rows>[ds1].[none:Revenue:qk]</rows>
+             <cols></cols>
+           </table>
+         </worksheet>
+         <worksheet name="Sheet2">
+           <table>
+             <rows>[ds1].[none:Revenue:qk]</rows>
+             <cols></cols>
+           </table>
+         </worksheet>
+       </worksheets>
+     </workbook>'
+  )
+  out_long <- twb_field_usage(xml, wide = FALSE)
+  out_wide <- twb_field_usage(xml, wide = TRUE)
+  # Wide has no sheet / context / n_appearances columns
+  expect_false("context" %in% names(out_wide))
+  expect_false("n_appearances" %in% names(out_wide))
+  # Each unique (field_clean, datasource) becomes one row
+  expect_equal(nrow(out_wide), nrow(dplyr::distinct(out_long, field_clean, datasource)))
+})
+
+test_that("twb_field_usage include_shelves=FALSE shows only filters", {
+  xml <- xml2::read_xml(
+    '<workbook>
+       <worksheets>
+         <worksheet name="Sheet1">
+           <table>
+             <rows>[ds1].[none:Revenue:qk]</rows>
+             <filters>
+               <filter class="categorical" column="[ds1].[none:Region:nk]"/>
+             </filters>
+           </table>
+         </worksheet>
+       </worksheets>
+     </workbook>'
+  )
+  out <- twb_field_usage(xml, include_shelves = FALSE)
+  if (nrow(out) > 0L)
+    expect_true(all(out$context == "filter"))
+})
+
+test_that("twb_field_usage returns empty tibble when both FALSE", {
+  xml <- xml2::read_xml('<workbook><worksheets/></workbook>')
+  expect_message(
+    out <- twb_field_usage(xml, include_filters = FALSE, include_shelves = FALSE),
+    "both FALSE"
+  )
+  expect_equal(nrow(out), 0L)
+})
+
+test_that("twb_field_usage empty workbook returns empty tibble", {
+  xml <- xml2::read_xml('<workbook><worksheets/></workbook>')
+  out <- twb_field_usage(xml)
+  expect_equal(nrow(out), 0L)
+  expect_true(all(c("field_clean", "datasource", "sheet", "context",
+                    "n_appearances") %in% names(out)))
+})
+
+# ---- twb_replication_brief: list structure ----------------------------------
+
+test_that("twb_replication_brief returns a list with expected elements", {
+  twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+  skip_if_not(nzchar(twb) && file.exists(twb), "example .twb not found")
+  xml  <- xml2::read_xml(twb)
+  brief <- twb_replication_brief(xml)
+  expect_type(brief, "list")
+  expected_keys <- c("meta", "datasources", "parameters", "custom_sql",
+                     "calculated_fields", "field_usage", "filters", "sorts",
+                     "chart_types", "dashboard_layout", "actions")
+  expect_true(all(expected_keys %in% names(brief)))
+})
+
+test_that("twb_replication_brief meta has correct columns and types", {
+  twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+  skip_if_not(nzchar(twb) && file.exists(twb), "example .twb not found")
+  xml  <- xml2::read_xml(twb)
+  meta <- twb_replication_brief(xml)$meta
+  expect_equal(nrow(meta), 1L)
+  expect_true(is.character(meta$workbook_file))
+  expect_true(is.integer(meta$n_datasources))
+  expect_true(is.integer(meta$n_worksheets))
+  expect_true(is.character(meta$generated_at))
+})
+
+test_that("twb_replication_brief format=text returns character(1)", {
+  twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+  skip_if_not(nzchar(twb) && file.exists(twb), "example .twb not found")
+  xml  <- xml2::read_xml(twb)
+  txt  <- twb_replication_brief(xml, format = "text")
+  expect_type(txt, "character")
+  expect_length(txt, 1L)
+  expect_match(txt, "REPLICATION BRIEF")
+  expect_match(txt, "CALCULATED FIELDS")
+})
+
+test_that("twb_replication_brief include_sql=FALSE sets custom_sql to NULL", {
+  twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+  skip_if_not(nzchar(twb) && file.exists(twb), "example .twb not found")
+  xml   <- xml2::read_xml(twb)
+  brief <- twb_replication_brief(xml, include_sql = FALSE)
+  expect_null(brief$custom_sql)
+})
+
+test_that("twb_replication_brief include_formulas=TRUE adds formula_pretty", {
+  twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+  skip_if_not(nzchar(twb) && file.exists(twb), "example .twb not found")
+  xml   <- xml2::read_xml(twb)
+  brief <- twb_replication_brief(xml, include_formulas = TRUE)
+  cf    <- brief$calculated_fields
+  if (nrow(cf) > 0L)
+    expect_true("formula_pretty" %in% names(cf))
+})
+
+# ---- TwbParser integration --------------------------------------------------
+
+test_that("TwbParser exposes get_calc_complexity and active binding", {
+  twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+  skip_if_not(nzchar(twb) && file.exists(twb), "example .twb not found")
+  parser <- TwbParser$new(twb)
+  method_out  <- parser$get_calc_complexity()
+  binding_out <- parser$calc_complexity
+  expect_s3_class(method_out, "tbl_df")
+  expect_identical(binding_out, method_out)
+})
+
+test_that("TwbParser exposes get_field_usage and active binding", {
+  twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+  skip_if_not(nzchar(twb) && file.exists(twb), "example .twb not found")
+  parser <- TwbParser$new(twb)
+  method_out  <- parser$get_field_usage()
+  binding_out <- parser$field_usage
+  expect_s3_class(method_out, "tbl_df")
+  expect_identical(binding_out, method_out)
+})
+
+test_that("TwbParser get_replication_brief returns a list", {
+  twb <- system.file("extdata", "test_for_wenjie.twb", package = "twbparser")
+  skip_if_not(nzchar(twb) && file.exists(twb), "example .twb not found")
+  parser <- TwbParser$new(twb)
+  brief  <- parser$get_replication_brief()
+  expect_type(brief, "list")
+  expect_true("meta" %in% names(brief))
+  # workbook_file should be the basename of the .twb path
+  expect_equal(brief$meta$workbook_file, basename(twb))
+})