diff --git a/Project.toml b/Project.toml index db60d1b..102c676 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.3.0" [deps] Conda = "8f4d0f93-b110-5947-807f-2305c1781a2d" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6" JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" @@ -18,6 +19,7 @@ StructTypes = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" [compat] Conda = "^1.5.0" +DataFrames = "1.7.0" DataStructures = "^0.18.11" Graphs = "^1.4.1" JSON3 = "^1.0.1" diff --git a/src/SimpleHypergraphs.jl b/src/SimpleHypergraphs.jl index c27e022..1cfb3d4 100644 --- a/src/SimpleHypergraphs.jl +++ b/src/SimpleHypergraphs.jl @@ -26,7 +26,7 @@ export get_twosection_adjacency_mx, get_twosection_weighted_adjacency_mx export dual export random_model, random_kuniform_model, random_dregular_model, random_preferential_model -export Abstract_HG_format, HGF_Format, JSON_Format +export Abstract_HG_format, HGF_Format, JSON_Format, HIF_Format export hg_load, hg_save export modularity @@ -78,6 +78,8 @@ include("abstracttypes.jl") include("hypergraph.jl") include("io.jl") +include("io_hif.jl") + include("models/bipartite.jl") include("models/twosection.jl") include("models/random-models.jl") diff --git a/src/io_hif.jl b/src/io_hif.jl new file mode 100644 index 0000000..aa857be --- /dev/null +++ b/src/io_hif.jl @@ -0,0 +1,228 @@ +using JSON3 +using DataFrames + + +struct HIF_Format <: Abstract_HG_format end + + +function hg_load( + io::IO, + format::HIF_Format; + T::Type{U} = Bool, + D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, + sort_by_id::Bool=false, + show_warning::Bool=true, +) where {U<:Real} + data = JSON3.read(read(io, String), Dict{String, Any}) + + haskey(data, "incidences") || throw(ArgumentError("Missing required attribute 'incidences'")) + + if isempty(data["incidences"]) + if isempty(get(data, "edges", [])) && isempty(get(data, "nodes", [])) + return Hypergraph{ + T, + Union{Union{String, Int}, Dict{String, Any}}, + Union{Union{String, Int}, Dict{String, Any}}, + D, + }(0, 0) + elseif isempty(data["edges"]) || isempty(data["nodes"]) + throw(ArgumentError("When incidences are empty, both 'nodes' and 'edges' must contain data")) + end + end + + edges = build_edges_dataframe(data) + nodes = build_nodes_dataframe(data) + + add_nodes_and_edges_from_incidences!(data, edges, nodes) + + if sort_by_id + sort!(edges, (:edge)) + sort!(nodes, (:node)) + end + + if show_warning + if edges.edge != 1:nrow(edges) + @warn "Edges in the source file were not sorted - their order was changed." + end + + if nodes.node != 1:nrow(nodes) + @warn "Nodes in the source file were not sorted - their order was changed" + end + end + + v_meta = Vector{Union{Union{String, Int}, Dict{String, Any}}}() + he_meta = Vector{Union{Union{String, Int}, Dict{String, Any}}}() + + for row in eachrow(nodes) + attrs = row.attrs + if isnothing(attrs) + attrs = row.node + end + push!(v_meta, attrs) + end + + for row in eachrow(edges) + attrs = row.attrs + if isnothing(attrs) + attrs = row.edge + end + + push!(he_meta, attrs) + end + + hg = Hypergraph{ + T, + Union{Union{String, Int}, Dict{String, Any}}, + Union{Union{String, Int}, Dict{String, Any}}, + D, + }(nrow(nodes), nrow(edges), v_meta, he_meta) + + add_weights_from_incidences!(data, hg, edges, nodes) + + hg +end + + +function hg_load( + fname::String, + format::HIF_Format; + T::Type{U} = Bool, + D::Type{<:AbstractDict{Int, U}} = Dict{Int, T}, + sort_by_id::Bool=false, + show_warning::Bool=true, +) where {U<:Real} + open(io -> hg_load(io, format, T=T, D=D, sort_by_id=sort_by_id, show_warning=show_warning), fname, "r") +end + + + +function add_weights_from_incidences!( + data::Dict{String, Any}, + hg::Hypergraph, + edges::DataFrame, + nodes::DataFrame, +) + edge_dict = Dict{Union{String, Int}, Int}(row.edge => idx for (row, idx) in zip(eachrow(edges), 1:nrow(edges))) + node_dict = Dict{Union{String, Int}, Int}(row.node => idx for (row, idx) in zip(eachrow(nodes), 1:nrow(nodes))) + + incidences = data["incidences"] + + for inc in incidences + edge_idx = edge_dict[inc["edge"]] + node_idx = node_dict[inc["node"]] + + weight = (haskey(inc, "weight")) ? inc["weight"] : 1 + + hg[node_idx, edge_idx] = weight + + end + +end + +function build_edges_dataframe( + data::Dict{String, Any}, +) + edges = DataFrame( + ; + edge=Union{String, Int}[], + attrs=Union{Nothing, Dict{String, Any}}[] + ) + + if !haskey(data, "edges") + return edges + end + + seen = Set{Union{Int, String}}() + + for edge in data["edges"] + if edge["edge"] ∈ seen + continue + end + attrs = (haskey(edge, "attrs")) ? edge["attrs"] : nothing + + push!(edges, [edge["edge"], attrs]) + push!(seen, edge["edge"]) + end + + edges +end + +function build_nodes_dataframe( + data::Dict{String, Any}, +) + nodes = DataFrame( + ; + node=Union{String, Int}[], + attrs=Union{Nothing, Dict{String, Any}}[] + ) + + if !haskey(data, "nodes") + return nodes + end + + seen = Set{Union{String, Int}}() + + for node in data["nodes"] + if node["node"] ∈ seen + continue + end + + attrs = (haskey(node, "attrs")) ? node["attrs"] : nothing + + push!(nodes, [node["node"], attrs]) + push!(seen, node["node"]) + end + + nodes +end + + +function add_nodes_and_edges_from_incidences!( + data::Dict{String, Any}, + edges::DataFrame, + nodes::DataFrame, +) + edge_ids = Set{Union{String, Int}}(edges.edge) + node_ids = Set{Union{String, Int}}(nodes.node) + for incidence in data["incidences"] + node = incidence["node"] + edge = incidence["edge"] + + if node ∉ node_ids + push!(nodes, [node, nothing]) + push!(node_ids, node) + end + + if edge ∉ edge_ids + push!(edges, [edge, nothing]) + push!(edge_ids, edge) + end + + end +end + + +""" + hg_save(io::IO, h::Hypergraph, format::HIF_Format) + +Saves a hypergraph `h` to an output stream `io` in `HIF` format. + +If `h` has `Composite Types` either for vertex metadata or hyperedges metadata, +the user has to explicit tell the JSON3 package about it, for instance using: + +`JSON3.StructType(::Type{MyType}) = JSON3.Struct()`. + +See the (JSON3.jl documentation)[https://github.com/quinnj/JSON3.jl] for more details. + +""" +function hg_save(io::IO, h::Hypergraph{T, V, E, D}, format::HIF_Format) where {T, V, E, D} + incidences = Vector{Dict{String, Union{String, Int, T}}}() + for i in 1:nhv(h) + for j in sort!(collect(keys(gethyperedges(h, i)))) + weight = h[i, j] + push!(incidences, Dict{String, Union{String, Int, T}}("edge" => i, "node" => j, "weight" => T(weight))) + end + end + json_hg = Dict{Symbol, typeof(incidences)}(:incidences => incidences) + JSON3.write(io, json_hg) +end diff --git a/test/data/HIF-standard/README.md b/test/data/HIF-standard/README.md new file mode 100644 index 0000000..cc3f453 --- /dev/null +++ b/test/data/HIF-standard/README.md @@ -0,0 +1,6 @@ +# HIF-standard + +In this directory you can find files used to test import and export of Hypergraphs in the `HIF` format. + +Read more about `HIF` here: +https://github.com/pszufe/HIF-standard diff --git a/test/data/HIF-standard/duplicated_nodes_edges.json b/test/data/HIF-standard/duplicated_nodes_edges.json new file mode 100644 index 0000000..a02124e --- /dev/null +++ b/test/data/HIF-standard/duplicated_nodes_edges.json @@ -0,0 +1,7 @@ +{ + "network-type": "undirected", + "metadata": {}, + "nodes": [{"node": "n1"}, {"node": "n1"}], + "edges": [{"edge": "e1"}, {"edge": "e1"}], + "incidences": [{"edge": "e1", "node": "n1"}, {"edge": "e1", "node": "n1"}] +} \ No newline at end of file diff --git a/test/data/HIF-standard/empty_arrays.json b/test/data/HIF-standard/empty_arrays.json new file mode 100644 index 0000000..7b0ce2d --- /dev/null +++ b/test/data/HIF-standard/empty_arrays.json @@ -0,0 +1,7 @@ +{ + "network-type": "undirected", + "metadata": {}, + "incidences": [], + "nodes": [], + "edges": [] +} \ No newline at end of file diff --git a/test/data/HIF-standard/empty_hypergraph.json b/test/data/HIF-standard/empty_hypergraph.json new file mode 100644 index 0000000..7a65310 --- /dev/null +++ b/test/data/HIF-standard/empty_hypergraph.json @@ -0,0 +1,3 @@ +{ + "incidences": [] +} \ No newline at end of file diff --git a/test/data/HIF-standard/metadata_with_deeply_nested_attributes.json b/test/data/HIF-standard/metadata_with_deeply_nested_attributes.json new file mode 100644 index 0000000..bd78510 --- /dev/null +++ b/test/data/HIF-standard/metadata_with_deeply_nested_attributes.json @@ -0,0 +1,15 @@ +{ + "network-type": "asc", + "metadata": { + "level1": { + "level2": { + "level3": { + "key": "value" + } + } + } + }, + "incidences": [{"edge": 1, "node": 2}], + "nodes": [{"node": "n1", "attrs": {"nested_attr": {"key1": "value1"}}}], + "edges": [{"edge": "e1", "attrs": {"nested_attr": {"key2": "value2"}}}] +} \ No newline at end of file diff --git a/test/data/HIF-standard/metadata_with_nested_attributes.json b/test/data/HIF-standard/metadata_with_nested_attributes.json new file mode 100644 index 0000000..697be84 --- /dev/null +++ b/test/data/HIF-standard/metadata_with_nested_attributes.json @@ -0,0 +1,13 @@ +{ + "network-type": "asc", + "metadata": { + "creator": "nested_test", + "extra_info": { + "key1": "value1", + "key2": "value2" + } + }, + "incidences": [{"edge": 10, "node": 20}], + "nodes": [{"node": 20, "attrs": {"color": "blue", "size": "large"}}], + "edges": [{"edge": 10, "attrs": {"priority": "high"}}] +} \ No newline at end of file diff --git a/test/data/HIF-standard/missing_direction.json b/test/data/HIF-standard/missing_direction.json new file mode 100644 index 0000000..f0a4be6 --- /dev/null +++ b/test/data/HIF-standard/missing_direction.json @@ -0,0 +1,5 @@ +{ + "network-type": "directed", + "metadata": {}, + "incidences": [{"edge": 1, "node": 2}] +} \ No newline at end of file diff --git a/test/data/HIF-standard/single_incidence.json b/test/data/HIF-standard/single_incidence.json new file mode 100644 index 0000000..1453273 --- /dev/null +++ b/test/data/HIF-standard/single_incidence.json @@ -0,0 +1,8 @@ +{ + "incidences": [ + { + "edge": "abcd", + "node": 42 + } + ] +} \ No newline at end of file diff --git a/test/data/HIF-standard/single_incidence_with_attrs.json b/test/data/HIF-standard/single_incidence_with_attrs.json new file mode 100644 index 0000000..5d5dece --- /dev/null +++ b/test/data/HIF-standard/single_incidence_with_attrs.json @@ -0,0 +1,12 @@ +{ + "incidences": [ + { + "edge": "abcd", + "node": 42, + "attrs": { + "role": "PI", + "age": 42 + } + } + ] +} \ No newline at end of file diff --git a/test/data/HIF-standard/single_incidence_with_weights.json b/test/data/HIF-standard/single_incidence_with_weights.json new file mode 100644 index 0000000..52cb6a4 --- /dev/null +++ b/test/data/HIF-standard/single_incidence_with_weights.json @@ -0,0 +1,9 @@ +{ + "incidences": [ + { + "edge": "abcd", + "node": 42, + "weight": -2 + } + ] +} \ No newline at end of file diff --git a/test/data/HIF-standard/valid_incidence_head.json b/test/data/HIF-standard/valid_incidence_head.json new file mode 100644 index 0000000..ed554cc --- /dev/null +++ b/test/data/HIF-standard/valid_incidence_head.json @@ -0,0 +1,5 @@ +{ + "network-type": "directed", + "metadata": {}, + "incidences": [{"edge": 1, "node": 2, "direction": "head"}] +} \ No newline at end of file diff --git a/test/data/HIF-standard/valid_incidence_tail.json b/test/data/HIF-standard/valid_incidence_tail.json new file mode 100644 index 0000000..3aa3bb2 --- /dev/null +++ b/test/data/HIF-standard/valid_incidence_tail.json @@ -0,0 +1,5 @@ +{ + "network-type": "directed", + "metadata": {}, + "incidences": [{"edge": 1, "node": 2, "direction": "tail"}] +} \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 1537cb6..0983c50 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,6 +8,31 @@ using DataStructures import Graphs +@testset "HIF test" begin + dir = "data/HIF-standard" + + for file in readdir(dir) + full_path = joinpath(dir, file) + + endswith(file, ".json") || continue + + @testset "File: $file" begin + h = hg_load(full_path, HIF_Format(), T=Real) + + io_h = IOBuffer() + + hg_save(io_h, h, HIF_Format()) + + seekstart(io_h) + + h_loaded = hg_load(io_h, HIF_Format(), T=Real) + + @test h == h_loaded + end + end +end + + h1 = Hypergraph{Float64, Int, String}(5,4) h1[1:3,1] .= 1.5 h1[3,4] = 2.5 @@ -74,6 +99,12 @@ h1[5,2] = 6.5 @test get_vertex_meta(h1, 1) == get_vertex_meta(loaded_hg, 1) @test get_hyperedge_meta(h1, 2) == get_hyperedge_meta(loaded_hg, 2) + hg_save("test.json", h1, format=HIF_Format()) + loaded_hg = hg_load("test.json", HIF_Format(), T=Float64, V=Int, E=String) + + @test h1 == loaded_hg + @test h1.v_meta == loaded_hg.v_meta + @test h1.he_meta == loaded_hg.he_meta end @test_throws ArgumentError hg_load("data/test_malformedcomment.hgf"; T=Int)