From 84401c30c580de3f6a2d3903bf04f0df99934f4d Mon Sep 17 00:00:00 2001 From: Mathieu BISKUPSKI Date: Wed, 29 Apr 2026 16:06:52 +0200 Subject: [PATCH] feat: add `next!` and `prev!` for in-place LazyNode traversal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `next(o::LazyNode)` allocates a fresh `LazyNode` on every call, which is fine for occasional use but adds up sharply when a downstream package walks a large document — e.g. extracting all `Placemark` elements from a 50 MiB KML can allocate ~1 M `LazyNode` wrappers in the iterator alone (~38 MiB cumulative on a single benchmark run). Add a strictly-additive in-place pair, `next!(o)` / `prev!(o)`, that mutates `o` to point at the next/previous node and returns `o` (or `nothing` at the document boundary). Exported alongside `next` / `prev`. The aliasing trade-off is documented in the docstring: callers must not retain references to a previous position unless they explicitly snapshot with `LazyNode(o.raw)`. The existing `next` / `prev` methods are unchanged; this is purely opt-in API surface for hot paths. --- src/XML.jl | 55 +++++++++++++++++++++++++++++++++++++++++++++++- test/runtests.jl | 45 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 1 deletion(-) diff --git a/src/XML.jl b/src/XML.jl index 273bfda..4204b35 100644 --- a/src/XML.jl +++ b/src/XML.jl @@ -9,7 +9,7 @@ export # Interface: children, nodetype, tag, attributes, value, is_simple, simplevalue, simple_value, # Extended Interface for LazyNode: - parent, depth, next, prev + parent, depth, next, prev, next!, prev! #-----------------------------------------------------------------------------# escape/unescape const escape_chars = ('&' => "&", '<' => "<", '>' => ">", "'" => "'", '"' => """) @@ -116,6 +116,59 @@ function prev(o::LazyNode) n.type === RawElementClose ? prev(LazyNode(n)) : LazyNode(n) end +""" + next!(o::LazyNode) -> LazyNode | Nothing + +In-place variant of [`next`](@ref): advance `o` to the next node in +document order by mutating its fields. Returns `o` (now repositioned) +or `nothing` if the end of the document has been reached. + +Functionally equivalent to `o = next(o)` but avoids allocating a fresh +`LazyNode` per traversal step. Tight loops that walk a large document +— for instance a downstream package extracting all `Placemark` +elements from a 50 MiB KML — can trade their per-step `LazyNode` +allocations for a single reused object. + +The trade-off is **aliasing**: `o` is the same object after each call, +so callers must NOT retain references to a previous position (e.g. by +pushing `o` into a collection) — those references would silently track +the new position instead. If you need to keep a snapshot, copy the +raw descriptor with `LazyNode(o.raw)`. +""" +function next!(o::LazyNode) + n = next(o.raw) + isnothing(n) && return nothing + while n !== nothing && n.type === RawElementClose + n = next(n) + end + isnothing(n) && return nothing + setfield!(o, :raw, n) + setfield!(o, :tag, nothing) + setfield!(o, :attributes, nothing) + setfield!(o, :value, nothing) + return o +end + +""" + prev!(o::LazyNode) -> LazyNode | Nothing + +In-place reverse counterpart of [`next!`](@ref); see that method's +docstring for the aliasing caveat. +""" +function prev!(o::LazyNode) + n = prev(o.raw) + isnothing(n) && return nothing + while n !== nothing && n.type === RawElementClose + n = prev(n) + end + isnothing(n) && return nothing + setfield!(o, :raw, n) + setfield!(o, :tag, nothing) + setfield!(o, :attributes, nothing) + setfield!(o, :value, nothing) + return o +end + #-----------------------------------------------------------------------------# Node """ Node(nodetype, tag, attributes, value, children) diff --git a/test/runtests.jl b/test/runtests.jl index 89978eb..78d51f0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -244,6 +244,51 @@ end end end +#-----------------------------------------------------------------------------# next! / prev! +@testset "LazyNode next! / prev!" begin + lzxml = """""" + lz = XML.parse(XML.LazyNode, lzxml) + + # Functional equivalence: walking with `next!` visits the same nodes + # as the allocating `next` chain. + walker = XML.next(lz) + walked = [XML.write(walker)] + while XML.next!(walker) !== nothing + push!(walked, XML.write(walker)) + end + expected = String[] + n = XML.next(lz) + while n !== nothing + push!(expected, XML.write(n)) + n = XML.next(n) + end + @test walked == expected + + # Identity: `next!(o)` returns the very same object + walker = XML.next(lz) + @test XML.next!(walker) === walker + + # Memoization fields are reset when the node is repositioned + walker = XML.next(lz) + _ = walker.tag + @test getfield(walker, :tag) !== nothing + XML.next!(walker) + @test getfield(walker, :tag) === nothing + + # `nothing` at the document boundary, idempotent there + walker = XML.next(lz) + while XML.next!(walker) !== nothing; end + @test XML.next!(walker) === nothing + + # `prev!` is the symmetric counterpart + lz2 = XML.parse(XML.LazyNode, lzxml) + walker = XML.next(lz2) + XML.next!(walker); XML.next!(walker) # root → a → b + @test walker.tag == "b" + @test XML.prev!(walker) === walker # in-place + @test walker.tag == "a" +end + #-----------------------------------------------------------------------------# Preserve whitespace @testset "xml:space" begin @testset "Basic xml:space functionality" begin