diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 55c3b2a..27ff1dd 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -9,7 +9,8 @@ jobs: strategy: matrix: version: - - "min" + # TODO: Put back after https://github.com/JuliaIO/FileIO.jl/pull/427 + #- "min" - "1" - "pre" os: diff --git a/Project.toml b/Project.toml index 4c609d5..a6286f4 100644 --- a/Project.toml +++ b/Project.toml @@ -7,25 +7,35 @@ authors = ["Erik Schnetter "] projects = ["test", "docs"] [deps] +AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" ChunkCodecLibBlosc = "c6a955be-ab7f-4fbb-b38f-caf93db6b928" ChunkCodecLibBzip2 = "2b723af9-f480-4e8d-a1e4-4a9f5a906122" ChunkCodecLibLz4 = "7e9cc85e-5614-42a3-ad86-b78f920b38a5" ChunkCodecLibZlib = "4c0bbee4-addc-4d73-81a0-b6caacae83c8" ChunkCodecLibZstd = "55437552-ac27-4d47-9aa3-63184e8fd398" CodecXz = "ba30903b-d9e8-5048-a5ec-d1f5b0d4b47b" +FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" MD5 = "6ac74813-4b46-53a4-afec-0b5dc9d7885c" +OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" PkgVersion = "eebad327-c553-4316-9ea0-9fa01ccd7688" StridedViews = "4db3bf67-4bd7-4b4e-b153-31dc3fb37143" YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" +# TODO: Remove after https://github.com/JuliaIO/FileIO.jl/pull/427 +[sources] +FileIO = {url = "https://github.com/icweaver/FileIO.jl", rev = "asdf"} + [compat] +AbstractTrees = "0.4.5" ChunkCodecLibBlosc = "0.2.0, 0.3" ChunkCodecLibBzip2 = "0.2.0, 1.0" ChunkCodecLibLz4 = "0.2.1, 1.0" ChunkCodecLibZlib = "0.2.0, 1.0" ChunkCodecLibZstd = "0.2.0, 1.0" CodecXz = "0.7.4" +FileIO = "1" MD5 = "0.2.1" +OrderedCollections = "1.8.1" PkgVersion = "0.3.3" StridedViews = "0.2.1, 0.3, 0.4" YAML = "0.4.9" diff --git a/docs/make.jl b/docs/make.jl index 80d9484..1f6ae0a 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -11,6 +11,7 @@ makedocs(; prettyurls = true, canonical = "https://juliaastro.org/ASDF/stable/", ), + doctest = false, ) deploydocs(; diff --git a/src/ASDF.jl b/src/ASDF.jl index 17d031c..e9f40e5 100644 --- a/src/ASDF.jl +++ b/src/ASDF.jl @@ -10,6 +10,11 @@ using MD5: md5 using PkgVersion: PkgVersion using StridedViews: StridedView using YAML: YAML +using OrderedCollections: OrderedDict +using FileIO: @format_str, File, load, save +using AbstractTrees: AbstractTrees + +export load, save ################################################################################ @@ -594,7 +599,7 @@ end struct ASDFFile filename::AbstractString - metadata::Dict{Any,Any} + metadata::OrderedDict{Any,Any} lazy_block_headers::LazyBlockHeaders end @@ -602,14 +607,132 @@ function YAML.write(file::ASDFFile) return "[ASDF file \"$(file.filename)\"]\n" * YAML.write(file.metadata) end +Base.getindex(af::ASDFFile, key) = af.metadata[key] + +struct ASDFTreeNode + key::Any + value::Any +end + +AbstractTrees.children(n::ASDFTreeNode) = + n.value isa ASDFFile ? [ASDFTreeNode(k, v) for (k, v) in n.value.metadata] : + n.value isa AbstractDict ? [ASDFTreeNode(k, v) for (k, v) in sort(collect(n.value); by = first)] : () + +AbstractTrees.printnode(io::IO, n::ASDFTreeNode) = + n.key === nothing ? print(io, basename(n.value.filename)) : + n.value isa AbstractDict ? print(io, n.key, "::", typeof(n.key)) : + n.value isa NDArray ? print(io, n.key, "::", typeof(n.value), " | shape = ", n.value.shape) : + n.value isa AbstractVector ? print(io, n.key, "::" , typeof(n.value), " | shape = ", size(n.value)) : + print(io, n.key, "::", typeof(n.value), " | ", n.value) + +""" + info(io::IO, af::ASDFFile; max_rows = 20) + +Display up to `max_rows` lines of `af` tree. `Base.show` calls this function internally to display this type. Set `max_rows = Inf` to display all rows. + +## Examples + +```jldoctest +julia> using OrderedCollections: OrderedDict + +julia> doc = OrderedDict("field_\$(i)" => rand(10) for i in 1:25); + +julia> save("long.asdf", doc) + +julia> af = load("long.asdf") +long.asdf +├─ field_1::Vector{Float64} | shape = (10,) +├─ field_2::Vector{Float64} | shape = (10,) +├─ field_3::Vector{Float64} | shape = (10,) +├─ field_4::Vector{Float64} | shape = (10,) +├─ field_5::Vector{Float64} | shape = (10,) +├─ field_6::Vector{Float64} | shape = (10,) +├─ field_7::Vector{Float64} | shape = (10,) +├─ field_8::Vector{Float64} | shape = (10,) +├─ field_9::Vector{Float64} | shape = (10,) +├─ field_10::Vector{Float64} | shape = (10,) +├─ field_11::Vector{Float64} | shape = (10,) +├─ field_12::Vector{Float64} | shape = (10,) +├─ field_13::Vector{Float64} | shape = (10,) +├─ field_14::Vector{Float64} | shape = (10,) +├─ field_15::Vector{Float64} | shape = (10,) +├─ field_16::Vector{Float64} | shape = (10,) +├─ field_17::Vector{Float64} | shape = (10,) +├─ field_18::Vector{Float64} | shape = (10,) +├─ field_19::Vector{Float64} | shape = (10,) + ⋮ (7) more rows + +julia> ASDF.info(af; max_rows = 5) +long.asdf +├─ field_1::Vector{Float64} | shape = (10,) +├─ field_2::Vector{Float64} | shape = (10,) +├─ field_3::Vector{Float64} | shape = (10,) +├─ field_4::Vector{Float64} | shape = (10,) + ⋮ (22) more rows + +julia> ASDF.info(af; max_rows = Inf) +long.asdf +├─ field_1::Vector{Float64} | shape = (10,) +├─ field_2::Vector{Float64} | shape = (10,) +├─ field_3::Vector{Float64} | shape = (10,) +├─ field_4::Vector{Float64} | shape = (10,) +├─ field_5::Vector{Float64} | shape = (10,) +├─ field_6::Vector{Float64} | shape = (10,) +├─ field_7::Vector{Float64} | shape = (10,) +├─ field_8::Vector{Float64} | shape = (10,) +├─ field_9::Vector{Float64} | shape = (10,) +├─ field_10::Vector{Float64} | shape = (10,) +├─ field_11::Vector{Float64} | shape = (10,) +├─ field_12::Vector{Float64} | shape = (10,) +├─ field_13::Vector{Float64} | shape = (10,) +├─ field_14::Vector{Float64} | shape = (10,) +├─ field_15::Vector{Float64} | shape = (10,) +├─ field_16::Vector{Float64} | shape = (10,) +├─ field_17::Vector{Float64} | shape = (10,) +├─ field_18::Vector{Float64} | shape = (10,) +├─ field_19::Vector{Float64} | shape = (10,) +├─ field_20::Vector{Float64} | shape = (10,) +├─ field_21::Vector{Float64} | shape = (10,) +├─ field_22::Vector{Float64} | shape = (10,) +├─ field_23::Vector{Float64} | shape = (10,) +├─ field_24::Vector{Float64} | shape = (10,) +├─ field_25::Vector{Float64} | shape = (10,) +└─ asdf/library::String + ├─ author::String | Erik Schnetter + ├─ homepage::String | https://github.com/JuliaAstro/ASDF.jl + ├─ name::String | ASDF.jl + └─ version::String | 2.0.0 +""" +function info(io::IO, af::ASDFFile; max_rows = 20) + root = ASDFTreeNode(nothing, af) + n_rows = sum(1 for _ in AbstractTrees.PostOrderDFS(root)) + + if n_rows ≤ max_rows + AbstractTrees.print_tree(io, root) + else + # Store entire tree in `buf` + buf = IOBuffer() + AbstractTrees.print_tree(buf, root) + # Only print up to `n_rows` lines from that buffer + lines = split(String(take!(buf)), '\n', keepempty = false) + foreach(l -> println(io, l), Iterators.take(lines, max_rows)) + println(io, " ⋮ (", n_rows - max_rows, ") more rows") + end +end +info(af; kwargs...) = info(stdout, af; kwargs...) + +Base.show(io::IO, ::MIME"text/plain", af::ASDFFile) = info(io, af) # Display up to `max_rows` by default + ################################################################################ -function load_file(filename::AbstractString; extensions = false, validate_checksum = true) - asdf_constructors = copy(YAML.default_yaml_constructors) - asdf_constructors["tag:stsci.edu:asdf/core/asdf-1.1.0"] = asdf_constructors["tag:yaml.org,2002:map"] - asdf_constructors["tag:stsci.edu:asdf/core/software-1.0.0"] = asdf_constructors["tag:yaml.org,2002:map"] - asdf_constructors["tag:stsci.edu:asdf/core/extension_metadata-1.0.0"] = asdf_constructors["tag:yaml.org,2002:map"] +ordered_map_constructor = (constructor, node) -> YAML.construct_mapping(OrderedDict{Any,Any}, constructor, node) +asdf_constructors = copy(YAML.default_yaml_constructors) +delete!(asdf_constructors, "tag:yaml.org,2002:map") # Let dicttype= handle plain maps +asdf_constructors["tag:stsci.edu:asdf/core/asdf-1.1.0"] = ordered_map_constructor +asdf_constructors["tag:stsci.edu:asdf/core/software-1.0.0"] = ordered_map_constructor +asdf_constructors["tag:stsci.edu:asdf/core/extension_metadata-1.0.0"] = ordered_map_constructor +function load_file(filename::AbstractString; extensions = false, validate_checksum = true) if extensions # Use fallbacks for now asdf_constructors[nothing] = (constructor, node) -> begin @@ -635,12 +758,46 @@ function load_file(filename::AbstractString; extensions = false, validate_checks asdf_constructors′["tag:stsci.edu:asdf/core/ndarray-chunk-1.0.0"] = construct_yaml_ndarray_chunk asdf_constructors′["tag:stsci.edu:asdf/core/chunked-ndarray-1.0.0"] = construct_yaml_chunked_ndarray - metadata = YAML.load(io, asdf_constructors′) + metadata = YAML.load(io, asdf_constructors′; dicttype = OrderedDict{Any, Any}) # lazy_block_headers.block_headers = find_all_blocks(io, position(io)) lazy_block_headers.block_headers = find_all_blocks(io; validate_checksum) return ASDFFile(filename, metadata, lazy_block_headers) end +""" + load(f::AbstractString) + +Load an asdf file at filepath `f`. + +## Examples + +```jldoctest +julia> using OrderedCollections: OrderedDict + +julia> doc = OrderedDict("field_\$(i)" => rand(10) for i in 1:5); # Create some sample data + +julia> save("myfile.asdf", doc) + +julia> load("myfile.asdf") +myfile.asdf +├─ field_1::Vector{Float64} | shape = (10,) +├─ field_2::Vector{Float64} | shape = (10,) +├─ field_3::Vector{Float64} | shape = (10,) +├─ field_4::Vector{Float64} | shape = (10,) +├─ field_5::Vector{Float64} | shape = (10,) +└─ asdf/library::String + ├─ author::String | Erik Schnetter + ├─ homepage::String | https://github.com/JuliaAstro/ASDF.jl + ├─ name::String | ASDF.jl + └─ version::String | 2.0.0 +``` +""" +function fileio_load(f::File{format"ASDF"}) + return load_file(f.filename) +end + +@doc (@doc fileio_load) load + ################################################################################ ################################################################################ ################################################################################ @@ -653,7 +810,7 @@ struct ASDFLibrary end function YAML._print(io::IO, val::ASDFLibrary, level::Int=0, ignore_level::Bool=false) println(io, "!core/software-1.0.0") - library = Dict(:name => val.name, :author => val.author, :homepage => val.homepage, :version => val.version) + library = OrderedDict(:name => val.name, :author => val.author, :homepage => val.homepage, :version => val.version) YAML._print(io, library, level, ignore_level) end @@ -688,7 +845,7 @@ function YAML._print(io::IO, val::NDArrayWrapper, level::Int=0, ignore_level::Bo data = val.array # Split multidimensional arrays into array-of-arrays data = eachslice(data; dims=Tuple(2:ndims(data))) - ndarray = Dict( + ndarray = OrderedDict( :data => data, :shape => collect(reverse(size(val.array)))::Vector{<:Integer}, :datatype => string(Datatype(eltype(val.array))), @@ -700,7 +857,7 @@ function YAML._print(io::IO, val::NDArrayWrapper, level::Int=0, ignore_level::Bo source = length(blocks.arrays) # `write_file()` has a corresponding `push!()` to `blocks.positions` push!(blocks.arrays, val) - ndarray = Dict( + ndarray = OrderedDict( :source => source::Integer, :shape => collect(reverse(size(val.array)))::Vector{<:Integer}, :datatype => string(Datatype(eltype(val.array))), @@ -748,20 +905,20 @@ function encode_Lz4_block(input::AbstractVector{UInt8}; chunk_size::Int = 1024 * return out end -function write_file(filename::AbstractString, document::Dict{Any,Any}) +function write_file(filename::AbstractString, document::AbstractDict) # Set up block descriptors global blocks empty!(blocks) # Ensure standard tags are present # TODO: - # - provide a function that generates a standard empty document - # - don't modify the input - # - remove the `{Any,Any}` in the test cases - # - maybe make the document not a `Dict` but the stuff with the `metadata` that the writer returns? - get!(document, "asdf/library") do - ASDFLibrary(software_name, software_author, software_homepage, software_version) - end + # - [ ] provide a function that generates a standard empty document + # - [x] don't modify the input + # - [x] remove the `{Any,Any}` in the test cases + # - [ ] maybe make the document not a `Dict` but the stuff with the `metadata` that the writer returns? + # - [ ] preserve insertion order? https://github.com/JuliaAstro/ASDF.jl/tree/ordered + library = ASDFLibrary(software_name, software_author, software_homepage, software_version) + full_document = merge(document, OrderedDict{Any, Any}("asdf/library" => library)) # Write YAML part of file io = open(filename, "w") @@ -775,7 +932,7 @@ function write_file(filename::AbstractString, document::Dict{Any,Any}) --- !core/asdf-1.1.0""", ) - YAML.write(io, document) + YAML.write(io, full_document) println(io, "...") # Write blocks @@ -898,4 +1055,25 @@ function write_file(filename::AbstractString, document::Dict{Any,Any}) return nothing end +""" + save(f::String, data) + +Save `data` to an asdf file at filepath `f`. + +## Examples + +```jldoctest +julia> using OrderedCollections: OrderedDict + +julia> data = OrderedDict("field_\$(i)" => rand(10) for i in 1:5); # Create some sample data + +julia> save("myfile.asdf", data) +``` +""" +function fileio_save(f::File{format"ASDF"}, data) + return write_file(f.filename, data) +end + +@doc (@doc fileio_save) save + end diff --git a/test/Project.toml b/test/Project.toml index c7f25f6..be3639a 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,6 +1,8 @@ [deps] ASDF = "686f71d1-807d-59a4-a860-28280ea06d7b" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" ParallelTestRunner = "d3525ed8-44d0-4b2c-a655-542cee43accc" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" diff --git a/test/runtests.jl b/test/runtests.jl index 28d5d15..da70a4f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,11 @@ using ParallelTestRunner: runtests, find_tests, parse_args using ASDF +# Doctest +using Documenter +DocMeta.setdocmeta!(ASDF, :DocTestSetup, :(using ASDF); recursive = true) +doctest(ASDF) + const init_code = quote using ASDF using Test diff --git a/test/test-read.jl b/test/test-read.jl index bd25b6a..88137be 100644 --- a/test/test-read.jl +++ b/test/test-read.jl @@ -1,15 +1,15 @@ @testset "Read ASDF file" begin - asdf = ASDF.load_file("blue_upchan_gain.00000000.asdf") + asdf = load("blue_upchan_gain.00000000.asdf") println(YAML.write(asdf.metadata)) map_tree(output, asdf.metadata) - buffer = asdf.metadata[0]["buffer"][] + buffer = asdf[0]["buffer"][] @test eltype(buffer) == Float16 @test size(buffer) == (256,) @test buffer == fill(1, 256) - dish_index = asdf.metadata[0]["dish_index"][] + dish_index = asdf[0]["dish_index"][] @test eltype(dish_index) == Int32 @test size(dish_index) == (3, 2) @test dish_index == [ diff --git a/test/test-read_chunked.jl b/test/test-read_chunked.jl index b7a101c..ca0dbef 100644 --- a/test/test-read_chunked.jl +++ b/test/test-read_chunked.jl @@ -1,10 +1,10 @@ @testset "Read ASDF file with chunked arrays" begin - asdf = ASDF.load_file("chunking.asdf") + asdf = load("chunking.asdf") println(YAML.write(asdf.metadata)) map_tree(output, asdf.metadata) - chunky = asdf.metadata["chunky"][] + chunky = asdf["chunky"][] @test eltype(chunky) == Float16 @test size(chunky) == (4, 4) @test chunky == [ diff --git a/test/test-show.jl b/test/test-show.jl new file mode 100644 index 0000000..eb965b3 --- /dev/null +++ b/test/test-show.jl @@ -0,0 +1,10 @@ +@testset "Show method for `ASDF.ASDFFile`" begin + af = load("blue_upchan_gain.00000000.asdf") + + @test occursin("blue_upchan_gain.00000000.asdf\n├─", sprint(show, MIME"text/plain"(), af)) + + @test occursin("(5) more rows", sprint(io -> ASDF.info(io, af; max_rows = 5))) + + # I'm sure there's a better way to test this code path + @test ASDF.info(af; max_rows = 5) == nothing +end diff --git a/test/test-write.jl b/test/test-write.jl index 4960107..479b523 100644 --- a/test/test-write.jl +++ b/test/test-write.jl @@ -3,10 +3,10 @@ filename = joinpath(dirname, "output.asdf") array = Float64[1/(i+j+k-2) for i in 1:50, j in 1:51, k in 1:52] - doc = Dict{Any,Any}( + doc = Dict( "data1" => ASDF.NDArrayWrapper([1 2; 3 4]; inline=false), "data2" => ASDF.NDArrayWrapper([1 2; 3 4]; inline=true), - "group" => Dict{Any,Any}( + "group" => Dict( "element1" => ASDF.NDArrayWrapper(array; compression=ASDF.C_None), "element2" => ASDF.NDArrayWrapper(array; compression=ASDF.C_Blosc), "element3" => ASDF.NDArrayWrapper(array; compression=ASDF.C_Bzip2), @@ -17,26 +17,26 @@ "element8" => ASDF.NDArrayWrapper(array; compression=ASDF.C_Zstd), ), ) - ASDF.write_file(filename, doc) + save(filename, doc) - doc′ = ASDF.load_file(filename) + doc′ = load(filename) map_tree(output, doc′.metadata) data1 = doc["data1"][] - data1′ = doc′.metadata["data1"][] + data1′ = doc′["data1"][] @test eltype(data1′) == eltype(data1) @test size(data1′) == size(data1) @test data1′ == data1 data2 = doc["data2"][] - data2′ = doc′.metadata["data2"][] + data2′ = doc′["data2"][] @test eltype(data2′) == eltype(data2) @test size(data2′) == size(data2) @test data2′ == data2 for n in 1:8 element = doc["group"]["element$n"][] - element′ = doc′.metadata["group"]["element$n"][] + element′ = doc′["group"]["element$n"][] @test eltype(element′) == eltype(element) @test size(element′) == size(element) @test element′ == element