diff --git a/CHANGELOG.md b/CHANGELOG.md index e12acf9..97a5095 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- **Math in labels** — any label (title, axis labels, `TextPlot` markdown bodies) may embed `$...$` math written in LaTeX-ish syntax (`$\sigma^2$`, `$\frac{a}{b}$`, `$\sqrt{x^2+y^2}$`). Math regions are lowered to inline Unicode — Greek letters, operators, super/subscripts (all-or-nothing, with a clean `x^(2q)` fallback), `\frac`→`a/b`, `\sqrt`→`√(…)` — by every backend including the terminal. Zero dependencies, always on; a literal dollar is written `\$`. See *Reference → Math in Labels*. - **`QuiverPlot`** — 2-D vector field rendered as arrows. Each arrow has a tail at `(x, y)` and a vector `(u, v)`. Features: `from_function()` constructor for sampling a closure on a regular grid; auto-scaled arrow length (longest arrow ≈ one grid cell via a `span/√n` heuristic) or explicit `with_scale`; proportional arrow heads that make every arrow "look like an arrow" regardless of magnitude; three pivot modes (`Tail`, `Middle`, `Tip`); optional magnitude-driven colormap with automatic colorbar; `tight_bounds` opt-in for dense fields and independent `with_clip_to_plot_area()` for plot-area clipping; combo helper `with_magnitude_colormap(cmap, label)`. CLI: `kuva quiver`. - **Pre-compiled release binaries** — pushing a `vX.Y.Z` tag now builds standalone `kuva` CLI binaries (with the `cli,full` feature set: SVG + PNG + PDF) for Linux (x86_64 gnu/musl, aarch64), macOS (Intel + Apple Silicon) and Windows (x86_64), and attaches them with SHA-256 checksums to the matching GitHub Release. Users can download a binary and run it without installing Rust. See `.github/workflows/release.yml` (resolves #17). - **`ManhattanPlot::with_thin_overlapping_labels()`** — opts the Manhattan x-axis into collision-aware chromosome labelling. By default every chromosome whose band is at least 6px wide is labelled, which can overprint the labels of adjacent small chromosomes (e.g. 17/19/21) on a genome-wide plot. When enabled, labels are placed in a single left-to-right pass and any label whose estimated footprint would overlap the previously drawn one is skipped, automatically thinning crowded regions while keeping the rest readable. Works with both horizontal and rotated (`Layout::with_x_tick_rotate`) labels. Off by default; existing behaviour is unchanged. diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index 8b2f2e9..aadcf5e 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -141,6 +141,7 @@ - [SVG Interactivity](./reference/interactive.md) - [Date & Time Axes](./reference/datetime.md) - [Stats Box](./reference/stats_box.md) +- [Math in Labels](./reference/math.md) # Performance diff --git a/docs/src/assets/math/fraction.svg b/docs/src/assets/math/fraction.svg new file mode 100644 index 0000000..e60ff9f --- /dev/null +++ b/docs/src/assets/math/fraction.svg @@ -0,0 +1 @@ +012340246810(a + b)/crateFraction \ No newline at end of file diff --git a/docs/src/assets/math/greek.svg b/docs/src/assets/math/greek.svg new file mode 100644 index 0000000..13dbc92 --- /dev/null +++ b/docs/src/assets/math/greek.svg @@ -0,0 +1 @@ +012340246810μ ± σcountStandard deviation \ No newline at end of file diff --git a/docs/src/assets/math/mixed.svg b/docs/src/assets/math/mixed.svg new file mode 100644 index 0000000..062b9e1 --- /dev/null +++ b/docs/src/assets/math/mixed.svg @@ -0,0 +1 @@ +012340246810Variance, σ² (units)∇ · FMixed text and math \ No newline at end of file diff --git a/docs/src/assets/math/quadratic.svg b/docs/src/assets/math/quadratic.svg new file mode 100644 index 0000000..a641246 --- /dev/null +++ b/docs/src/assets/math/quadratic.svg @@ -0,0 +1 @@ +012340246810x = (-b ± √(b² - 4ac))/(2a)rootsQuadratic formula \ No newline at end of file diff --git a/docs/src/assets/math/rotated_ylabel.svg b/docs/src/assets/math/rotated_ylabel.svg new file mode 100644 index 0000000..86152e8 --- /dev/null +++ b/docs/src/assets/math/rotated_ylabel.svg @@ -0,0 +1 @@ +012340246810timeEnergy E = mc²Mass–energy equivalence \ No newline at end of file diff --git a/docs/src/assets/math/sqrt.svg b/docs/src/assets/math/sqrt.svg new file mode 100644 index 0000000..c971ce7 --- /dev/null +++ b/docs/src/assets/math/sqrt.svg @@ -0,0 +1 @@ +012340246810√(x² + y²)distanceSquare root \ No newline at end of file diff --git a/docs/src/assets/math/sum.svg b/docs/src/assets/math/sum.svg new file mode 100644 index 0000000..e0919ea --- /dev/null +++ b/docs/src/assets/math/sum.svg @@ -0,0 +1 @@ +012340246810∑ᵢ₌₁ⁿ xᵢtotalSummation \ No newline at end of file diff --git a/docs/src/assets/math/superscript.svg b/docs/src/assets/math/superscript.svg new file mode 100644 index 0000000..2a8e8eb --- /dev/null +++ b/docs/src/assets/math/superscript.svg @@ -0,0 +1 @@ +012340246810x² + y² = r²f(x)Power law \ No newline at end of file diff --git a/docs/src/reference/math.md b/docs/src/reference/math.md new file mode 100644 index 0000000..8f272b4 --- /dev/null +++ b/docs/src/reference/math.md @@ -0,0 +1,77 @@ +# Math in Labels + +Any label — axis titles, the plot title, `TextPlot` bodies — may embed math +inside `$...$` using LaTeX-ish syntax: + +```rust +Layout::new((0.0, 3.0), (0.0, 10.0)) + .with_x_label("Variance, $\\sigma^2$ (units)") + .with_y_label("Energy $E = mc^2$"); +``` + +Math regions are lowered to inline **Unicode** text — zero dependencies, +always on, in every backend including the terminal: + +| Input | Output | +|-------|--------| +| `$\sigma^2$` | σ² | +| `$x_i$` | xᵢ | +| `$a \leq b \cdot c$` | a ≤ b · c | +| `$\frac{a}{b}$` | a/b | +| `$\frac{a+b}{c}$` | (a+b)/c | +| `$\sqrt{x^2+y^2}$` | √(x²+y²) | +| `$\sum_{i=1}^{n} x_i$` | ∑ᵢ₌₁ⁿ xᵢ | + +The lowering never emits a stray `\` or `$`. A literal dollar sign is written +`\$` (e.g. `Price \$5`), and a `$` without a closing partner is left untouched. + +## Supported syntax + +- **Greek letters** — `\alpha`…`\Omega`, including variants like `\varepsilon` + and `\varphi`. +- **Operators, relations, arrows** — `\pm`, `\times`, `\cdot`, `\div`, + `\leq`, `\geq`, `\neq`, `\approx`, `\propto`, `\in`, `\partial`, `\nabla`, + `\infty`, `\to`, `\degree`, and friends. +- **Superscripts / subscripts** — `x^2` → x², `x_i` → xᵢ, with `{...}` groups + (`x^{2n}` → x²ⁿ). These are **all-or-nothing**: if every character in the + group has a Unicode super/subscript form you get `x²ⁿ`; if any doesn't + (e.g. `q`, most capitals) the whole group falls back to a clean `x^(2q)` — + never a half-substituted mix. +- **Fractions** — `\frac{a}{b}` → `a/b`; multi-term parts are parenthesised: + `\frac{a+b}{c}` → `(a+b)/c`. +- **Radicals** — `\sqrt{x}` → `√x`, `\sqrt{x+y}` → `√(x+y)`, + `\sqrt[3]{x}` → `³√x`. + +Fractions and radicals are rendered **inline** (`a/b`, `√(…)`), never +stacked — the output is plain text that flows anywhere a label can go, +including rotated y-axis titles and terminal character grids. Unknown +commands are dropped cleanly (the argument is kept, the backslash never +leaks into output). + +## Examples + +Generated by `cargo run --example math`: + +Greek letters in an axis label +Superscripts in an axis label +Fraction in an axis label +Square root in an axis label +Summation with limits in an axis label +Quadratic formula in an axis label +Math in a rotated y-axis title +Mixed text and math in labels + +## CLI + +Math works in any label flag — no extra flags needed: + +```bash +kuva scatter data.tsv --x x --y y \ + --x-label 'Variance, $\sigma^2$ (units)' \ + --y-label '$\sqrt{x^2 + y^2}$' \ + --title 'Rate $\frac{a + b}{c}$' \ + -o plot.svg +``` + +The same labels render in the terminal with `--terminal` — the lowered +Unicode lands directly on the character grid. diff --git a/examples/math.rs b/examples/math.rs new file mode 100644 index 0000000..eb0cdf2 --- /dev/null +++ b/examples/math.rs @@ -0,0 +1,78 @@ +//! Math-in-labels documentation examples. +//! +//! Generates canonical SVG outputs used in the kuva documentation. +//! Run with: +//! +//! ```bash +//! cargo run --example math +//! ``` +//! +//! SVGs are written to `docs/src/assets/math/`. + +use kuva::backend::svg::SvgBackend; +use kuva::plot::scatter::ScatterPlot; +use kuva::render::layout::Layout; +use kuva::render::plots::Plot; +use kuva::render::render::render_multiple; +use std::fs; + +const OUT: &str = "docs/src/assets/math"; + +fn write(name: &str, plots: Vec, layout: Layout) { + fs::create_dir_all(OUT).unwrap(); + let svg = SvgBackend.render_scene(&render_multiple(plots, layout)); + fs::write(format!("{OUT}/{name}.svg"), svg).unwrap(); +} + +/// A scatter with a fixed dataset; only the labels vary between scenarios. +fn scatter(title: &str, x_label: &str, y_label: &str) -> (Vec, Layout) { + let plot = ScatterPlot::new() + .with_data(vec![(1.0_f64, 1.0), (2.0, 4.0), (3.0, 9.0)]) + .with_color("steelblue"); + let plots = vec![Plot::Scatter(plot)]; + let layout = Layout::new((0.0, 4.0), (0.0, 10.0)) + .with_title(title) + .with_x_label(x_label) + .with_y_label(y_label); + (plots, layout) +} + +fn main() { + // ── Greek, super/subscripts ─────────────────────────────────────────── + let (p, l) = scatter("Standard deviation", "$\\mu \\pm \\sigma$", "count"); + write("greek", p, l); + + let (p, l) = scatter("Power law", "$x^2 + y^2 = r^2$", "$f(x)$"); + write("superscript", p, l); + + // ── Fractions & radicals (lowered to inline a/b and √(…)) ──────────── + let (p, l) = scatter("Fraction", "$\\frac{a + b}{c}$", "rate"); + write("fraction", p, l); + + let (p, l) = scatter("Square root", "$\\sqrt{x^2 + y^2}$", "distance"); + write("sqrt", p, l); + + // ── Large operators ─────────────────────────────────────────────────── + let (p, l) = scatter("Summation", "$\\sum_{i=1}^{n} x_i$", "total"); + write("sum", p, l); + + let (p, l) = scatter( + "Quadratic formula", + "$x = \\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a}$", + "roots", + ); + write("quadratic", p, l); + + // ── Rotated y-axis title + mixed text/math ──────────────────────────── + let (p, l) = scatter("Mass–energy equivalence", "time", "Energy $E = mc^2$"); + write("rotated_ylabel", p, l); + + let (p, l) = scatter( + "Mixed text and math", + "Variance, $\\sigma^2$ (units)", + "$\\nabla \\cdot F$", + ); + write("mixed", p, l); + + println!("Math SVGs written to {OUT}/"); +} diff --git a/scripts/gen_docs.sh b/scripts/gen_docs.sh index 04c8ade..342fe0d 100755 --- a/scripts/gen_docs.sh +++ b/scripts/gen_docs.sh @@ -34,6 +34,7 @@ EXAMPLES=( line lollipop manhattan + math mosaic network parallel diff --git a/scripts/smoke_tests.sh b/scripts/smoke_tests.sh index 75cdb09..aeb6841 100755 --- a/scripts/smoke_tests.sh +++ b/scripts/smoke_tests.sh @@ -985,6 +985,38 @@ check "quiver grid on + tight bounds" \ --tight-bounds --pivot middle \ --title "Quiver Grid + Clip" +# ── math in labels ────────────────────────────────────────────────────────── +# $...$ math regions in labels are lowered to inline Unicode (σ², a/b, √(…), ∑) +# by every backend. Exercised across plot types and label slots to confirm it +# is not scatter-specific. +check "math superscript + sqrt" \ + "$BIN" scatter "$DATA/scatter.tsv" --x x --y y \ + --x-label 'Variance, $\sigma^2$ (units)' --y-label '$\sqrt{x^2+y^2}$' + +check "math fraction title" \ + "$BIN" scatter "$DATA/scatter.tsv" --x x --y y \ + --title 'Rate $\frac{a + b}{c}$' + +check "math sum with limits" \ + "$BIN" scatter "$DATA/scatter.tsv" --x x --y y \ + --x-label '$\sum_{i=1}^{n} x_i$' --title 'Summation' + +check "math greek and operators" \ + "$BIN" scatter "$DATA/scatter.tsv" --x x --y y \ + --x-label '$\alpha \leq \beta \neq \gamma$' --y-label '$\mu \pm \sigma$' + +check "math in rotated y-label" \ + "$BIN" scatter "$DATA/scatter.tsv" --x x --y y \ + --y-label 'Energy $E = mc^2$' + +check "math on line plot" \ + "$BIN" line "$DATA/measurements.tsv" --x time --y value \ + --x-label 'Time $t$ ($\mu s$)' --y-label 'Amplitude $A_0$' + +check "math on bar plot" \ + "$BIN" bar "$DATA/bar.tsv" --label-col category --value-col count \ + --y-label 'Count $\times 10^3$' + # ── summary ─────────────────────────────────────────────────────────────────── echo "" echo "Results: $PASS passed, $FAIL failed" diff --git a/scripts/terminal_plots.sh b/scripts/terminal_plots.sh index 3c2b4a8..08a9bca 100755 --- a/scripts/terminal_plots.sh +++ b/scripts/terminal_plots.sh @@ -232,4 +232,13 @@ run scatter "$DATA/scatter.tsv" --x x --y y \ --y-label-wrap 20 \ --terminal $W $H +# ── math in labels ──────────────────────────────────────────────────────────── +# $...$ regions are lowered to inline Unicode (σ², a/b, √, ∑) before drawing +# on the character grid. +header "math labels" +run scatter "$DATA/scatter.tsv" --x x --y y \ + --title 'Decay $\lambda$ vs $\sigma^2$' \ + --x-label 'Time ($\mu s$)' --y-label '$\sqrt{x^2 + y^2}$' \ + --terminal $W $H + echo diff --git a/src/backend/raster.rs b/src/backend/raster.rs index 317f727..9082585 100644 --- a/src/backend/raster.rs +++ b/src/backend/raster.rs @@ -1879,6 +1879,16 @@ impl RasterBackend { .as_ref() .and_then(kcolor_to_rgba) .unwrap_or(default_text); + // Lower any `$...$` math regions to inline Unicode + // (σ², a/b, √(…)) so they draw through the normal glyph + // path. No-op for plain labels. + let lowered; + let content: &str = if crate::render::math::needs_rewrite(content) { + lowered = crate::render::math::to_unicode(content); + &lowered + } else { + content + }; canvas.draw_text( sx!(*x), sy!(*y), diff --git a/src/backend/svg.rs b/src/backend/svg.rs index 7f97962..000394b 100644 --- a/src/backend/svg.rs +++ b/src/backend/svg.rs @@ -225,6 +225,17 @@ impl SvgBackend { bold, color, } => { + // Lower any `$...$` math regions to inline Unicode + // (σ², a/b, √(…)) so they render as ordinary text. + // No-op for plain labels. + let lowered; + let content: &str = if crate::render::math::needs_rewrite(content) { + lowered = crate::render::math::to_unicode(content); + &lowered + } else { + content + }; + let anchor_str = match anchor { TextAnchor::Start => "start", TextAnchor::Middle => "middle", diff --git a/src/backend/terminal.rs b/src/backend/terminal.rs index a907287..9b3bccf 100644 --- a/src/backend/terminal.rs +++ b/src/backend/terminal.rs @@ -821,6 +821,16 @@ impl Canvas { // reference line/tick. let baseline = *size as f64 * 0.35; let row = self.to_cy(y_s - baseline); + // Lower any `$...$` math regions to inline Unicode (σ, x², + // √(…)) — the character grid renders the result directly. + // No-op for plain labels. + let lowered; + let content: &str = if crate::render::math::needs_rewrite(content) { + lowered = crate::render::math::to_unicode(content); + &lowered + } else { + content + }; let chars: Vec = content.chars().collect(); let len = chars.len() as isize; diff --git a/src/lib.rs b/src/lib.rs index 1e5c25e..734bdae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,6 +35,15 @@ //! | `cli` | Enables the `kuva` CLI binary (pulls in `clap`). | //! | `full` | Enables `embed_font` + `png` + `pdf`. | //! +//! # Math in labels +//! +//! Any label (title, axis labels, annotations, markdown body text) may contain +//! `$...$` math regions written in LaTeX-ish syntax: `$\sigma^2$`, +//! `$\frac{a}{b}$`, `$\sqrt{x^2 + y^2}$`. They are lowered to inline Unicode — +//! Greek letters, operators, super/subscripts, `\frac`→`a/b`, `\sqrt`→`√(…)` — +//! by every backend, including the terminal. Zero dependencies; always on. +//! Write a literal dollar as `\$`. See [`render::math::to_unicode`]. +//! //! # Fonts //! //! DejaVu Sans is bundled inside the crate. The PNG and PDF backends always load diff --git a/src/render/math.rs b/src/render/math.rs new file mode 100644 index 0000000..8d27e66 --- /dev/null +++ b/src/render/math.rs @@ -0,0 +1,687 @@ +//! Math notation for `$...$` regions in labels. +//! +//! [`to_unicode`] rewrites a label's `$...$` regions (LaTeX-ish syntax) to +//! inline Unicode text: Greek letters (`\sigma`→σ), operators (`\leq`→≤), +//! super/subscripts (`x^2`→x², all-or-nothing with a clean `x^(2q)` fallback), +//! `\frac{a}{b}`→`a/b`, `\sqrt{x}`→`√x`. It never emits a stray `\` or `$`. +//! +//! This is a pure lookup/flattening pass — always compiled, zero dependencies, +//! deliberately **inline only** (no stacked fractions or other 2-D layout, so +//! it works everywhere a plain string does, including the terminal backend's +//! character grid and markdown body text). Literal dollars are written `\$`. + +// ─────────────────────────── detection ───────────────────────────────────── + +/// One segment of a label string: literal text or a math region (the body of +/// a `$...$`, without the dollar signs). +pub enum Segment<'a> { + Text(&'a str), + Math(&'a str), +} + +/// Does the label need rewriting before display? True when it contains a +/// `$...$` math region or an escaped `\$` (which must render as a literal +/// `$`). Backends gate on this to skip the rewrite cost for plain labels. +pub fn needs_rewrite(s: &str) -> bool { + contains_math(s) || s.contains("\\$") +} + +/// Cheap pre-check: does the string contain at least one `$...$` region? +/// Requires two unescaped `$`. Avoids the segment-split cost for plain labels. +pub fn contains_math(s: &str) -> bool { + let bytes = s.as_bytes(); + let mut i = 0; + let mut count = 0usize; + while i < bytes.len() { + if bytes[i] == b'\\' && i + 1 < bytes.len() && bytes[i + 1] == b'$' { + i += 2; + continue; + } + if bytes[i] == b'$' { + count += 1; + if count >= 2 { + return true; + } + } + i += 1; + } + false +} + +/// Split a label on `$...$` regions, honoring `\$` as a literal dollar. +/// An unclosed `$` makes the remainder a literal text segment. +pub fn split_segments(s: &str) -> Vec> { + let bytes = s.as_bytes(); + let mut out = Vec::new(); + let mut cursor = 0usize; + let mut i = 0usize; + while i < bytes.len() { + if bytes[i] == b'\\' && i + 1 < bytes.len() && bytes[i + 1] == b'$' { + i += 2; + continue; + } + if bytes[i] == b'$' { + if cursor < i { + out.push(Segment::Text(&s[cursor..i])); + } + let math_start = i + 1; + let mut j = math_start; + while j < bytes.len() { + if bytes[j] == b'\\' && j + 1 < bytes.len() { + j += 2; + continue; + } + if bytes[j] == b'$' { + break; + } + j += 1; + } + if j < bytes.len() { + out.push(Segment::Math(&s[math_start..j])); + i = j + 1; + cursor = i; + } else { + out.push(Segment::Text(&s[i..])); + cursor = bytes.len(); + break; + } + } else { + i += 1; + } + } + if cursor < bytes.len() { + out.push(Segment::Text(&s[cursor..])); + } + out +} + +/// Map a LaTeX-style command name (no leading `\`) to a Unicode symbol. +pub fn command_to_unicode(name: &str) -> Option { + Some(match name { + // Greek lowercase + "alpha" => 'α', + "beta" => 'β', + "gamma" => 'γ', + "delta" => 'δ', + "epsilon" | "varepsilon" => 'ε', + "zeta" => 'ζ', + "eta" => 'η', + "theta" => 'θ', + "iota" => 'ι', + "kappa" => 'κ', + "lambda" => 'λ', + "mu" => 'μ', + "nu" => 'ν', + "xi" => 'ξ', + "pi" => 'π', + "rho" => 'ρ', + "sigma" => 'σ', + "tau" => 'τ', + "upsilon" => 'υ', + "phi" | "varphi" => 'φ', + "chi" => 'χ', + "psi" => 'ψ', + "omega" => 'ω', + // Greek uppercase + "Gamma" => 'Γ', + "Delta" => 'Δ', + "Theta" => 'Θ', + "Lambda" => 'Λ', + "Xi" => 'Ξ', + "Pi" => 'Π', + "Sigma" => 'Σ', + "Phi" => 'Φ', + "Psi" => 'Ψ', + "Omega" => 'Ω', + // Operators / relations + "cdot" => '·', + "times" => '×', + "div" => '÷', + "pm" => '±', + "mp" => '∓', + "leq" | "le" => '≤', + "geq" | "ge" => '≥', + "neq" | "ne" => '≠', + "approx" => '≈', + "equiv" => '≡', + "sim" => '∼', + "propto" => '∝', + "ll" => '≪', + "gg" => '≫', + // Symbols + "infty" => '∞', + "partial" => '∂', + "nabla" => '∇', + "degree" => '°', + "angle" => '∠', + "forall" => '∀', + "exists" => '∃', + "in" => '∈', + "notin" => '∉', + "subset" => '⊂', + "cup" => '∪', + "cap" => '∩', + "ldots" => '…', + "cdots" => '⋯', + // Large operators + "sum" => '∑', + "prod" => '∏', + "int" => '∫', + // Arrows + "to" | "rightarrow" => '→', + "leftarrow" => '←', + "Rightarrow" => '⇒', + "Leftarrow" => '⇐', + "leftrightarrow" => '↔', + _ => return None, + }) +} + +// ─────────────────────────── unicode lowering ────────────────────────────── + +/// Rewrite a label's `$...$` math regions to inline Unicode text, leaving +/// surrounding text untouched. The result is plain text every backend can +/// render directly. See the module docs for the supported set. +/// +/// Guarantees: the output never contains a `\` introduced by a math command +/// or a `$` math delimiter. +pub fn to_unicode(label: &str) -> String { + let mut out = String::with_capacity(label.len()); + for seg in split_segments(label) { + match seg { + // `\$` in text renders as a literal `$` — drop the escape. + Segment::Text(t) => out.push_str(&t.replace("\\$", "$")), + Segment::Math(body) => clean_math(body, &mut out), + } + } + out +} + +/// Lower a single `$...$` body to inline Unicode, appending to `out`. +fn clean_math(body: &str, out: &mut String) { + let bytes = body.as_bytes(); + let mut i = 0; + while i < bytes.len() { + let c = bytes[i] as char; + if c == '\\' { + let (name, next) = read_command(body, i + 1); + match name { + "frac" => { + if let Some(((n, d), end)) = take_two_groups(body, next) { + push_frac_inline(n, d, out); + i = end; + continue; + } + } + "sqrt" => { + let (index, after_idx) = read_optional_bracket(body, next); + if let Some((arg, end)) = take_group(body, after_idx) { + if let Some(idx) = index { + // Index as superscript before the radical, if it + // maps cleanly; else fall back to inline form. + // Clean it first so a command index (`\sqrt[\alpha]`) + // can't leak a backslash through the fallback. + let mut cleaned = String::new(); + clean_math(idx, &mut cleaned); + if let Some(sup) = all_super(&cleaned) { + out.push_str(&sup); + } else { + out.push_str(&cleaned); + } + } + out.push('√'); + let mut inner = String::new(); + clean_math(arg, &mut inner); + wrap_inline(&inner, out); + i = end; + continue; + } + } + _ => { + if let Some(u) = command_to_unicode(name) { + out.push(u); + i = next; + continue; + } + // Unknown command: drop it (its `{arg}` is emitted by the + // brace rules below as cleaned content). + if !name.is_empty() { + i = next; + continue; + } + } + } + // Stray backslash (e.g. before a non-letter) — drop it. + i += 1; + continue; + } + if c == '^' || c == '_' { + if let Some((grp, end)) = read_script_group(body, i + 1) { + // Recurse so structure inside the group lowers correctly: + // `x^{\frac{1}{2}}` must become `x^(1/2)` via the fallback, + // not a silently corrupted x¹². + let sub = { + let mut s = String::new(); + clean_math(grp, &mut s); + s + }; + let mapped = if c == '^' { + all_super(&sub) + } else { + all_sub(&sub) + }; + match mapped { + Some(uni) => out.push_str(&uni), + None => { + // All-or-nothing: keep a clean caret/underscore form. + out.push(c); + out.push('('); + out.push_str(&sub); + out.push(')'); + } + } + i = end; + continue; + } + } + if c == '{' || c == '}' { + // Stray grouping braces — strip. + i += 1; + continue; + } + out.push(c); + i += 1; + } +} + +/// `\frac{a}{b}` → `a/b`, parenthesising multi-character parts. +fn push_frac_inline(num: &str, den: &str, out: &mut String) { + let n = { + let mut s = String::new(); + clean_math(num, &mut s); + s + }; + let d = { + let mut s = String::new(); + clean_math(den, &mut s); + s + }; + wrap_inline(&n, out); + out.push('/'); + wrap_inline(&d, out); +} + +/// Append `s`, wrapping in parens when it's more than one grapheme so inline +/// fractions/radicals stay unambiguous (`1/2` but `(a+b)/c`, `√(x+y)`). +fn wrap_inline(s: &str, out: &mut String) { + if s.chars().count() <= 1 { + out.push_str(s); + } else { + out.push('('); + out.push_str(s); + out.push(')'); + } +} + +/// Map every char of `s` to its Unicode superscript, or `None` if any lacks +/// one (all-or-nothing). +fn all_super(s: &str) -> Option { + let mut out = String::with_capacity(s.len()); + for c in s.chars() { + out.push(super_char(c)?); + } + Some(out) +} + +fn all_sub(s: &str) -> Option { + let mut out = String::with_capacity(s.len()); + for c in s.chars() { + out.push(sub_char(c)?); + } + Some(out) +} + +fn super_char(c: char) -> Option { + Some(match c { + '0' => '⁰', + '1' => '¹', + '2' => '²', + '3' => '³', + '4' => '⁴', + '5' => '⁵', + '6' => '⁶', + '7' => '⁷', + '8' => '⁸', + '9' => '⁹', + '+' => '⁺', + '-' => '⁻', + '=' => '⁼', + '(' => '⁽', + ')' => '⁾', + 'a' => 'ᵃ', + 'b' => 'ᵇ', + 'c' => 'ᶜ', + 'd' => 'ᵈ', + 'e' => 'ᵉ', + 'f' => 'ᶠ', + 'g' => 'ᵍ', + 'h' => 'ʰ', + 'i' => 'ⁱ', + 'j' => 'ʲ', + 'k' => 'ᵏ', + 'l' => 'ˡ', + 'm' => 'ᵐ', + 'n' => 'ⁿ', + 'o' => 'ᵒ', + 'p' => 'ᵖ', + 'r' => 'ʳ', + 's' => 'ˢ', + 't' => 'ᵗ', + 'u' => 'ᵘ', + 'v' => 'ᵛ', + 'w' => 'ʷ', + 'x' => 'ˣ', + 'y' => 'ʸ', + 'z' => 'ᶻ', + _ => return None, + }) +} + +fn sub_char(c: char) -> Option { + Some(match c { + '0' => '₀', + '1' => '₁', + '2' => '₂', + '3' => '₃', + '4' => '₄', + '5' => '₅', + '6' => '₆', + '7' => '₇', + '8' => '₈', + '9' => '₉', + '+' => '₊', + '-' => '₋', + '=' => '₌', + '(' => '₍', + ')' => '₎', + 'a' => 'ₐ', + 'e' => 'ₑ', + 'h' => 'ₕ', + 'i' => 'ᵢ', + 'j' => 'ⱼ', + 'k' => 'ₖ', + 'l' => 'ₗ', + 'm' => 'ₘ', + 'n' => 'ₙ', + 'o' => 'ₒ', + 'p' => 'ₚ', + 'r' => 'ᵣ', + 's' => 'ₛ', + 't' => 'ₜ', + 'u' => 'ᵤ', + 'v' => 'ᵥ', + 'x' => 'ₓ', + _ => return None, + }) +} + +// ── small parsing helpers ── + +/// Read an alphabetic command name starting at `start`; returns (name, index +/// just past it). Empty name if `start` isn't a letter. +fn read_command(s: &str, start: usize) -> (&str, usize) { + let bytes = s.as_bytes(); + let mut end = start; + while end < bytes.len() && (bytes[end] as char).is_ascii_alphabetic() { + end += 1; + } + (&s[start..end], end) +} + +/// If `pos` is at `{`, return (inner, index past `}`). +fn take_group(s: &str, pos: usize) -> Option<(&str, usize)> { + let bytes = s.as_bytes(); + if pos >= bytes.len() || bytes[pos] != b'{' { + return None; + } + let mut depth = 1; + let mut i = pos + 1; + let inner_start = i; + while i < bytes.len() { + match bytes[i] { + b'{' => depth += 1, + b'}' => { + depth -= 1; + if depth == 0 { + return Some((&s[inner_start..i], i + 1)); + } + } + _ => {} + } + i += 1; + } + None +} + +/// Read two consecutive `{..}{..}` groups. +fn take_two_groups(s: &str, pos: usize) -> Option<((&str, &str), usize)> { + let (a, after_a) = take_group(s, pos)?; + let (b, after_b) = take_group(s, after_a)?; + Some(((a, b), after_b)) +} + +/// If `pos` is at `[`, return (inner, index past `]`). +fn read_optional_bracket(s: &str, pos: usize) -> (Option<&str>, usize) { + let bytes = s.as_bytes(); + if pos < bytes.len() && bytes[pos] == b'[' { + if let Some(close) = s[pos + 1..].find(']') { + return (Some(&s[pos + 1..pos + 1 + close]), pos + 1 + close + 1); + } + } + (None, pos) +} + +/// Read a `^`/`_` operand: a `{group}`, a braceless `\command`, or a single +/// following char. +fn read_script_group(s: &str, pos: usize) -> Option<(&str, usize)> { + let bytes = s.as_bytes(); + if pos >= bytes.len() { + return None; + } + if bytes[pos] == b'{' { + return take_group(s, pos); + } + // Braceless command operand, e.g. `x^\alpha` — grab the whole `\name` so it + // isn't truncated to a lone `\` (which would leave `alpha` as literal text). + if bytes[pos] == b'\\' { + let (name, end) = read_command(s, pos + 1); + if !name.is_empty() { + return Some((&s[pos..end], end)); + } + } + // Single char (one UTF-8 scalar). + let ch_len = s[pos..].chars().next()?.len_utf8(); + Some((&s[pos..pos + ch_len], pos + ch_len)) +} + + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn detects_math() { + assert!(contains_math("a $x$ b")); + assert!(!contains_math("a $ b")); + assert!(!contains_math("price \\$5")); + } + + #[test] + fn greek_and_operators() { + assert_eq!(to_unicode("$\\sigma$"), "σ"); + assert_eq!(to_unicode("$\\alpha + \\beta$"), "α + β"); + assert_eq!(to_unicode("$a \\leq b \\cdot c$"), "a ≤ b · c"); + assert_eq!(to_unicode("$\\Omega$"), "Ω"); + assert_eq!(to_unicode("$\\infty$"), "∞"); + } + + #[test] + fn superscripts_and_subscripts() { + assert_eq!(to_unicode("$x^2$"), "x²"); + assert_eq!(to_unicode("$x_i$"), "xᵢ"); + assert_eq!(to_unicode("$x^{2n}$"), "x²ⁿ"); + assert_eq!(to_unicode("$x_{i+1}$"), "xᵢ₊₁"); + } + + #[test] + fn superscript_all_or_nothing_fallback() { + // 'q' has no Unicode superscript, so the whole group falls back. + assert_eq!(to_unicode("$x^{2q}$"), "x^(2q)"); + // Uppercase generally has none either. + assert_eq!(to_unicode("$x^{A}$"), "x^(A)"); + } + + #[test] + fn fractions_inline() { + assert_eq!(to_unicode("$\\frac{1}{2}$"), "1/2"); + assert_eq!(to_unicode("$\\frac{a+b}{c}$"), "(a+b)/c"); + assert_eq!(to_unicode("$\\frac{\\sqrt{a}}{b}$"), "(√a)/b"); + } + + #[test] + fn sqrt_inline() { + assert_eq!(to_unicode("$\\sqrt{x}$"), "√x"); + assert_eq!(to_unicode("$\\sqrt{x+y}$"), "√(x+y)"); + assert_eq!(to_unicode("$\\sqrt{x^2+y^2}$"), "√(x²+y²)"); + } + + #[test] + fn nested_chain_stays_linear() { + assert_eq!( + to_unicode("$\\frac{\\sqrt{a}}{b^2} + \\sum_{i=1}^{n} x_i$"), + "(√a)/(b²) + ∑ᵢ₌₁ⁿ xᵢ" + ); + } + + #[test] + fn no_backslash_or_dollar_in_output() { + let out = to_unicode("$\\frac{\\unknown{a}}{\\sqrt{b}}$"); + assert!(!out.contains('\\'), "got {out}"); + assert!(!out.contains('$'), "got {out}"); + } + + #[test] + fn text_around_math_preserved() { + assert_eq!( + to_unicode("Variance, $\\sigma^2$ (units)"), + "Variance, σ² (units)" + ); + } + + #[test] + fn escaped_dollar_is_literal() { + // `\$` marks a literal dollar; the escape is dropped on output. + assert_eq!(to_unicode("price \\$5 each"), "price $5 each"); + assert!(!contains_math("price \\$5 each")); + assert!(needs_rewrite("price \\$5 each")); + assert!(!needs_rewrite("no dollars at all")); + } + + // ── detection / segmentation edge cases ── + + #[test] + fn detection_edges() { + assert!(contains_math("$$")); // two dollars, even if empty + assert!(contains_math("a $x$ b $y$ c")); + assert!(!contains_math("")); // empty + assert!(!contains_math("no math here")); + assert!(!contains_math("\\$5 and \\$6")); // both escaped + } + + #[test] + fn empty_and_unclosed_math() { + assert_eq!(to_unicode("$$"), ""); // empty region + assert_eq!(to_unicode("a $ b"), "a $ b"); // unclosed → literal + assert_eq!(to_unicode("$\\alpha"), "$\\alpha"); // unclosed → literal + } + + #[test] + fn multiple_regions() { + assert_eq!(to_unicode("$\\alpha$ and $\\beta$"), "α and β"); + assert_eq!(to_unicode("$x^2$ vs $y_1$"), "x² vs y₁"); + } + + #[test] + fn full_greek_sample() { + assert_eq!(to_unicode("$\\theta$"), "θ"); + assert_eq!(to_unicode("$\\lambda$"), "λ"); + assert_eq!(to_unicode("$\\mu$"), "μ"); + assert_eq!(to_unicode("$\\varphi$"), "φ"); + assert_eq!(to_unicode("$\\Delta$"), "Δ"); + assert_eq!(to_unicode("$\\Sigma$"), "Σ"); + assert_eq!(to_unicode("$\\Psi$"), "Ψ"); + } + + #[test] + fn operator_sample() { + assert_eq!(to_unicode("$a \\times b$"), "a × b"); + assert_eq!(to_unicode("$a \\div b$"), "a ÷ b"); + assert_eq!(to_unicode("$x \\neq y$"), "x ≠ y"); + assert_eq!(to_unicode("$x \\approx y$"), "x ≈ y"); + assert_eq!(to_unicode("$x \\to \\infty$"), "x → ∞"); + assert_eq!(to_unicode("$\\partial f$"), "∂ f"); + assert_eq!(to_unicode("$x \\in S$"), "x ∈ S"); + assert_eq!(to_unicode("$90\\degree$"), "90°"); + } + + #[test] + fn unknown_command_and_stray_braces() { + assert_eq!(to_unicode("$\\foo{x}$"), "x"); // drop cmd, keep cleaned arg + assert_eq!(to_unicode("$\\foo$"), ""); // bare unknown dropped + assert_eq!(to_unicode("${x}$"), "x"); // stray braces stripped + } + + #[test] + fn subscript_then_superscript() { + assert_eq!(to_unicode("$x_i^2$"), "xᵢ²"); + } + + #[test] + fn braceless_command_as_script_operand() { + // `x^\alpha` must grab the whole `\alpha`, not a lone `\` (which left + // `alpha` as literal text). α has no Unicode superscript → clean fallback. + assert_eq!(to_unicode("$x^\\alpha$"), "x^(α)"); + assert_eq!(to_unicode("$x_\\beta$"), "x_(β)"); + } + + #[test] + fn subscript_all_or_nothing_fallback() { + // 'b' has no Unicode subscript, so the group falls back. + assert_eq!(to_unicode("$x_{bc}$"), "x_(bc)"); + } + + #[test] + fn sqrt_with_index() { + assert_eq!(to_unicode("$\\sqrt[3]{x}$"), "³√x"); // cube root index + assert_eq!(to_unicode("$\\sqrt[n]{x}$"), "ⁿ√x"); // n has a superscript + // A command index must be lowered, never leak its backslash. + assert_eq!(to_unicode("$\\sqrt[\\alpha]{x}$"), "α√x"); + } + + #[test] + fn structural_command_in_script_falls_back_cleanly() { + // `\frac` inside a script group has no inline-superscript form; the + // whole group must fall back to `^(1/2)` — never silently corrupt to + // x¹² by dropping the fraction structure. + assert_eq!(to_unicode("$x^{\\frac{1}{2}}$"), "x^(1/2)"); + assert_eq!(to_unicode("$x_{\\frac{a}{b}}$"), "x_(a/b)"); + } + + #[test] + fn quadratic_formula_full_chain() { + assert_eq!( + to_unicode("$\\frac{-b \\pm \\sqrt{b^2 - 4ac}}{2a}$"), + "(-b ± √(b² - 4ac))/(2a)" + ); + } +} diff --git a/src/render/mod.rs b/src/render/mod.rs index a73a7af..92f0bea 100644 --- a/src/render/mod.rs +++ b/src/render/mod.rs @@ -5,6 +5,9 @@ pub mod color; pub mod datetime; pub mod figure; pub mod layout; +// Inline-Unicode lowering for `$...$` math in labels. Zero-dep, used by all +// backends. +pub mod math; pub mod palette; pub mod plots; pub mod projection; diff --git a/src/render/render.rs b/src/render/render.rs index ab1e6ef..352ef23 100644 --- a/src/render/render.rs +++ b/src/render/render.rs @@ -12824,6 +12824,16 @@ fn parse_inline_markup(text: &str) -> Vec { } } flush(&mut plain, &mut spans); + + // Lower any `$...$` math in each span to inline Unicode, so math works + // inside markdown body text just like in plain labels. Math is parsed + // after markdown, so the styling markers are already consumed. + for span in &mut spans { + if crate::render::math::needs_rewrite(&span.text) { + span.text = crate::render::math::to_unicode(&span.text); + } + } + spans } diff --git a/tests/math_lookup.rs b/tests/math_lookup.rs new file mode 100644 index 0000000..c068c4d --- /dev/null +++ b/tests/math_lookup.rs @@ -0,0 +1,92 @@ +//! Integration tests for `$...$` math in labels: every backend lowers math +//! regions to inline Unicode (the zero-dep lookup pass in `render::math`). +//! +//! This is the path `cargo test --features cli,full` exercises, so these +//! guard the default rendering behaviour. + +use kuva::backend::svg::SvgBackend; +use kuva::backend::terminal::TerminalBackend; +use kuva::plot::scatter::ScatterPlot; +use kuva::render::layout::Layout; +use kuva::render::render::render_scatter; + +fn scatter_with_labels(title: &str, x: &str, y: &str) -> kuva::render::render::Scene { + let plot = ScatterPlot::new() + .with_data(vec![(1.0_f64, 1.0), (2.0, 4.0)]) + .with_color("steelblue"); + let layout = Layout::new((0.0, 3.0), (0.0, 10.0)) + .with_title(title) + .with_x_label(x) + .with_y_label(y); + render_scatter(&plot, layout).with_background(Some("white")) +} + +#[test] +fn terminal_lowers_math_to_unicode() { + let scene = scatter_with_labels("$\\sigma^2$ over $\\mu$", "x", "y"); + let out = TerminalBackend::new(120, 40).render_scene(&scene); + assert!(out.contains('σ'), "expected σ in terminal output"); + assert!(out.contains('μ'), "expected μ in terminal output"); + assert!( + out.contains('²'), + "expected superscript ² in terminal output" + ); + // No raw math source survives. + assert!(!out.contains('$'), "no `$` markers should remain"); + assert!(!out.contains("\\sigma"), "no LaTeX command should remain"); +} + +#[test] +fn svg_emits_unicode_text() { + let scene = scatter_with_labels("Title", "Variance, $\\sigma^2$ (units)", "y"); + let svg = SvgBackend::default().render_scene(&scene); + + assert!(svg.contains("σ²"), "expected lowered σ² in SVG text"); + assert!( + !svg.contains("$\\sigma"), + "raw LaTeX source must not appear" + ); + assert!( + !svg.contains("$\\sigma^2$"), + "raw math region must not appear" + ); +} + +#[test] +fn svg_fractions_and_sqrt() { + let scene = scatter_with_labels("$\\frac{a}{b}$", "$\\sqrt{x}$", "y"); + let svg = SvgBackend::default().render_scene(&scene); + assert!(svg.contains("a/b"), "fraction lowered inline"); + assert!(svg.contains("√x"), "sqrt lowered inline"); + assert!(!svg.contains('$'), "no `$` markers should remain"); +} + +// An escaped `\$` is a literal dollar: the backslash is dropped and a plain +// `$` is rendered, even when the label contains no math region. +#[test] +fn svg_escaped_dollar_is_literal() { + let scene = scatter_with_labels("Price \\$5", "x", "y"); + let svg = SvgBackend::default().render_scene(&scene); + assert!(svg.contains("Price $5"), "escape must be dropped"); + assert!(!svg.contains("\\$"), "backslash must not render"); +} + +// Math also works inside markdown TextPlot bodies (rich text). It's lowered to +// inline Unicode after markdown markers are parsed. +#[test] +fn markdown_textplot_lowers_math() { + use kuva::plot::text::TextPlot; + use kuva::render::plots::Plot; + use kuva::render::render::render_multiple; + + let tp = TextPlot::new() + .with_title("Result") + .with_body("The **variance** is $\\sigma^2$ and the mean is $\\mu$."); + let layout = Layout::new((0.0, 1.0), (0.0, 1.0)); + let svg = SvgBackend::default().render_scene(&render_multiple(vec![Plot::Text(tp)], layout)); + + assert!(svg.contains('σ'), "expected lowered σ in markdown body"); + assert!(svg.contains('μ'), "expected lowered μ in markdown body"); + assert!(svg.contains('²'), "expected superscript ²"); + assert!(!svg.contains('$'), "no raw `$` markers should remain"); +}