From 42a7e82cd8bd699c79c5ee1866d0913dd44a89db Mon Sep 17 00:00:00 2001 From: djukic14 Date: Fri, 21 Nov 2025 15:59:47 +0100 Subject: [PATCH 1/2] Add coloring of basis functions --- Project.toml | 6 +++ src/BEAST.jl | 11 +++++ src/coloring.jl | 59 +++++++++++++++++++++++++++ src/integralop.jl | 89 ++++++++++++++++++++++++++++++++++------- src/operator.jl | 32 +++++++++++++++ src/postproc.jl | 39 ++++++++---------- test/Project.toml | 2 + test/runtests.jl | 2 + test/test_coloring.jl | 93 +++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 297 insertions(+), 36 deletions(-) create mode 100644 src/coloring.jl create mode 100644 test/test_coloring.jl diff --git a/Project.toml b/Project.toml index 9bf4cbd3..81cb3a70 100644 --- a/Project.toml +++ b/Project.toml @@ -15,6 +15,7 @@ ExtendableSparse = "95c220a8-a1cf-11e9-0c77-dbfce5f500b3" FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" FastGaussQuadrature = "442a2c76-b920-505d-bb47-c5924d526838" FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" +GraphsColoring = "ed38bb8a-6120-4024-8d1e-8789784f251b" Infiltrator = "5903a43b-9cc3-4c30-8d17-598619ec4e9b" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" @@ -23,6 +24,8 @@ LiftedMaps = "d22a30c1-52ac-4762-a8c9-5838452405e0" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e" NestedUnitRanges = "032820ab-dc03-4b49-91f4-7d58d4da98b3" +OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5" +ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Requires = "ae029012-a4dd-5104-9daa-d747884805df" SauterSchwab3D = "0a13313b-1c00-422e-8263-562364ed9544" SauterSchwabQuadrature = "535c7bfe-2023-5c1d-b712-654ef9d93a38" @@ -46,12 +49,15 @@ ExtendableSparse = "1.4" FFTW = "0.2.3, 1" FastGaussQuadrature = "0.3, 0.4, 0.5, 1" FillArrays = "0.11, 0.12, 0.13, 1" +GraphsColoring = "0.2.0" Infiltrator = "1.8.2" IterativeSolvers = "0.9" Krylov = "0.10.1" LiftedMaps = "0.5.1" LinearMaps = "3.7 - 3.9, 3.11.2" NestedUnitRanges = "0.2.3" +OhMyThreads = "0.8.3" +ProgressMeter = "1.11.0" Requires = "1" SauterSchwab3D = "0.2" SauterSchwabQuadrature = "2.4.0" diff --git a/src/BEAST.jl b/src/BEAST.jl index 4a517e81..9089aeb7 100644 --- a/src/BEAST.jl +++ b/src/BEAST.jl @@ -1,5 +1,8 @@ module BEAST +using GraphsColoring +using OhMyThreads +using ProgressMeter using Distributed using LinearAlgebra using SharedArrays @@ -141,6 +144,12 @@ using SparseArrays function convolve end +function progressbar(workload, verbose; color=:white, kwargs...) + return Progress( + workload; barglyphs=BarGlyphs("[=> ]"), color=color, enabled=verbose, kwargs... + ) +end + include("utils/polynomial.jl") include("utils/specialfns.jl") include("utils/combinatorics.jl") @@ -201,6 +210,8 @@ include("bases/tensorbasis.jl") include("bases/composedbasis.jl") include("bases/local/localcomposedbasis.jl") +include("coloring.jl") + include("operator.jl") include("quadrature/strategies/quadstrat.jl") diff --git a/src/coloring.jl b/src/coloring.jl new file mode 100644 index 00000000..2916c4f5 --- /dev/null +++ b/src/coloring.jl @@ -0,0 +1,59 @@ +""" + function conflicts( + space::Space; + addata=assemblydata(space), + kwargs..., + ) + +Computes conflict indices for a Space. Two elements are in conflict if they are both +part of the support of the same basis function. + +# Arguments + + - `space::Space`: The space. + - `addata=assemblydata(space)`: The assembly data for the space (default: computed using `assemblydata`). + - `kwargs...`: Additional keyword arguments. + +# Returns + +A tuple containing: + + - `eachindex(elements)`: The indices of the elements. + - `ConflictFunctor(conflictindices)`: A functor that maps element indices to conflict indices. + - `Base.OneTo(numfunctions(space))`: The indices of the conflicts. +""" +function GraphsColoring.conflicts( + space::Space; + addata=assemblydata(space), + kwargs..., +) + elements, ad, _ = addata + + conflictindices = Vector{Int}[Int[] for _ in eachindex(elements)] + + for elementid in eachindex(elements) + for i in 1:length(ad[elementid]) + for (functionid, a) in ad[elementid, i] + iszero(a) && continue + push!(conflictindices[elementid], functionid) + end + end + end + + for i in eachindex(conflictindices) + conflictindices[i] = unique(conflictindices[i]) + end + + return eachindex(elements), + GraphsColoring.ConflictFunctor(conflictindices), + Base.OneTo(numfunctions(space)) +end + +function color(basis, ::Any; addata=assemblydata(basis), kwargs...) + return GraphsColoring.color(GraphsColoring.conflictmatrix(basis; addata=addata, kwargs...), WorkstreamDSATUR).colors +end + +# for serial scheduling, just return all elements in one color +function color(basis, ::SerialScheduler; addata=assemblydata(basis), kwargs...) + return [collect(1:length(first(addata)))] +end diff --git a/src/integralop.jl b/src/integralop.jl index e9170464..f1effb0e 100644 --- a/src/integralop.jl +++ b/src/integralop.jl @@ -172,9 +172,60 @@ function assemblechunk_body!(biop, end +function assemblechunk_body_colored!(biop, + test_space, testad, testelementids, + trial_space, trialad, trialelementids, + qd, store, scheduler; quadstrat) + test_shapes = refspace(test_space) + trial_shapes = refspace(trial_space) + + test_elements, test_assembly_data, test_cell_ptrs = testad + trial_elements, trial_assembly_data, trial_cell_ptrs = trialad + num_tshapes = numfunctions(test_shapes, domain(chart(geometry(test_space), first(geometry(test_space))))) + num_bshapes = numfunctions(trial_shapes, domain(chart(geometry(trial_space), first(geometry(trial_space))))) + @tasks for p in testelementids + @set scheduler = scheduler + @local zlocal = zeros(scalartype(biop, test_space, trial_space), num_tshapes, num_bshapes) + tcell, tptr = test_elements[p], test_cell_ptrs[p] + + for q in trialelementids + bcell, bptr = trial_elements[q], trial_cell_ptrs[q] + fill!(zlocal, 0) + @inline qrule = quadrule(biop, test_shapes, trial_shapes, p, tcell, q, bcell, qd, quadstrat) + momintegrals!(zlocal, biop, + test_space, tptr, tcell, + trial_space, bptr, bcell, qrule) + for j in 1:length(trial_assembly_data[q]), i in 1:length(test_assembly_data[p]) + zij = zlocal[i,j] + for (n,b) in trial_assembly_data[q][j] + iszero(b) && continue + zb = zij*b + for (m,a) in test_assembly_data[p][i] + iszero(a) && continue + store(a*zb, m, n) +end end end end end end + +struct AssembleblockbodyFunctor{B,T1,T2,T3,T4,T5,T6,T7,T8,T9} + biop::B + tfs::T1 + testelements::T2 + testassemblydata::T3 + bfs::T4 + trialelements::T5 + trialassemblydata::T6 + quadraturedata::T7 + zlocals::T8 + quadstrat::T9 +end + +function (f::AssembleblockbodyFunctor)(testids, trialids, store) + assembleblock_body!(f.biop, f.tfs, testids, f.testelements, f.testassemblydata, + f.bfs, trialids, f.trialelements, f.trialassemblydata, + f.quadraturedata, f.zlocals, store; quadstrat=f.quadstrat) +end function blockassembler(biop::IntegralOperator, tfs::Space, bfs::Space; quadstrat=defaultquadstrat(biop, tfs, bfs)) @@ -194,11 +245,18 @@ function blockassembler(biop::IntegralOperator, tfs::Space, bfs::Space; trial_elements, trial_assembly_data, quadrature_data, zlocals = assembleblock_primer(biop, tfs, bfs; quadstrat=qs) - return (test_ids, trial_ids, store) -> - assembleblock_body!(biop, - tfs, test_ids, test_elements, test_assembly_data, - bfs, trial_ids, trial_elements, trial_assembly_data, - quadrature_data, zlocals, store; quadstrat=qs) + return AssembleblockbodyFunctor( + biop, + tfs, + test_elements, + test_assembly_data, + bfs, + trial_elements, + trial_assembly_data, + quadrature_data, + zlocals, + qs, + ) # if CompScienceMeshes.refines(tgeo, bgeo) # return (test_ids, trial_ids, store) -> begin @@ -267,10 +325,10 @@ function assembleblock_primer(biop, tfs, bfs; qd = quaddata(biop, tshapes, bshapes, test_elements, bsis_elements, quadstrat) - zlocals = Matrix{scalartype(biop, tfs, bfs)}[] + zlocals = Channel{Matrix{scalartype(biop, tfs, bfs)}}(2*Threads.nthreads()) - for i in 1:Threads.nthreads() - push!(zlocals, zeros(scalartype(biop, tfs, bfs), num_tshapes, num_bshapes)) + for _ in 1:2*Threads.nthreads() + put!(zlocals, zeros(scalartype(biop, tfs, bfs), num_tshapes, num_bshapes)) end return test_elements, tad, bsis_elements, bad, qd, zlocals @@ -306,27 +364,30 @@ function assembleblock_body!(biop::IntegralOperator, @assert maximum(active_test_el_ids) <= length(test_elements) "$(maximum(active_test_el_ids)), $(length(test_elements))" @assert maximum(active_trial_el_ids) <= length(bsis_elements) "$(maximum(active_trial_el_ids)), $(length(bsis_elements))" + zlocal = take!(zlocals) for p in active_test_el_ids tcell = test_elements[p] for q in active_trial_el_ids bcell = bsis_elements[q] - fill!(zlocals[Threads.threadid()], 0) + fill!(zlocal, 0) qrule = quadrule(biop, test_shapes, trial_shapes, p, tcell, q, bcell, quadrature_data, quadstrat) - momintegrals!(zlocals[Threads.threadid()], biop, + momintegrals!(zlocal, biop, tfs, p, tcell, bfs, q, bcell, qrule) - for j in 1 : size(zlocals[Threads.threadid()],2) - for i in 1 : size(zlocals[Threads.threadid()],1) + for j in 1 : size(zlocal,2) + for i in 1 : size(zlocal,1) for (n,b) in trial_assembly_data[q,j] n′ = get(trial_id_in_blk, n, 0) n′ == 0 && continue for (m,a) in test_assembly_data[p,i] m′ = get(test_id_in_blk, m, 0) m′ == 0 && continue - store(a*zlocals[Threads.threadid()][i,j]*b, m′, n′) -end end end end end end end + store(a*zlocal[i,j]*b, m′, n′) + end end end end end end + put!(zlocals, zlocal) +end # function assembleblock_body_trial_refines_test!(biop::IntegralOperator, # tfs, test_ids, test_elements, test_assembly_data, diff --git a/src/operator.jl b/src/operator.jl index 391cf4df..e996c363 100644 --- a/src/operator.jl +++ b/src/operator.jl @@ -213,6 +213,38 @@ function assemble!(operator::Operator, test_functions::Space, trial_functions::S assemblechunk!(operator, test_functions, trial_functions, store; quadstrat) end +function assemble!(operator::Operator, testfunctions::Space, trialfunctions::Space, + store, threading::Scheduler; quadstrat=defaultquadstrat) + + quadstrat = quadstrat(operator, testfunctions, trialfunctions) + + testelements, testad, trialelements, trialad, qdata, _ = assembleblock_primer( + operator, testfunctions, trialfunctions; quadstrat=quadstrat) + + testad = (testelements, testad, 1:length(testelements)) + trialad = (trialelements, trialad, 1:length(trialelements)) + + testelementcolors = color(testfunctions, threading; addata=testad) + trialelementcolors = [1:length(trialelements)] + + qs = if CompScienceMeshes.refines(geometry(testfunctions), geometry(trialfunctions)) + TestRefinesTrialQStrat(quadstrat) + elseif CompScienceMeshes.refines(geometry(trialfunctions), geometry(testfunctions)) + TrialRefinesTestQStrat(quadstrat) + else + quadstrat + end + + pbar = progressbar(length(testelements) * length(trialelements), true) + + for (i, coloredtestelements) in enumerate(testelementcolors) + for (j, coloredtrialelements) in enumerate(trialelementcolors) + assemblechunk_body_colored!(operator, testfunctions, testad, coloredtestelements, + trialfunctions, trialad, coloredtrialelements, qdata, store, threading; quadstrat=qs) + next!(pbar; step = length(testelementcolors[i]) * length(trialelementcolors[j])) + end end + finish!(pbar) +end function assemble!(op::TransposedOperator, tfs::Space, bfs::Space, store, diff --git a/src/postproc.jl b/src/postproc.jl index 92901f2c..cb05e4a5 100644 --- a/src/postproc.jl +++ b/src/postproc.jl @@ -137,11 +137,12 @@ Evaluate operator for a given bases and expansion coefficients at the given poin """ function potential(op, points, coeffs, basis; type=SVector{3,ComplexF64}, - quadstrat=defaultquadstrat(op, basis)) + quadstrat=defaultquadstrat(op, basis), + threading=Threading{:single}) ff = zeros(type, size(points)) store(v,m,n) = (ff[m] += v*coeffs[n]) - potential!(store, op, points, basis; type, quadstrat) + potential!(store, op, points, basis; type, quadstrat, threading) return ff end @@ -171,9 +172,11 @@ function potential(op, points, coeffs, space::DirectProductSpace; end function potential!(store, op, points, basis; - type=SVector{3,ComplexF64}, - quadstrat=defaultquadstrat(op, basis)) + type::Type{T}=SVector{3,ComplexF64}, + quadstrat=defaultquadstrat(op, basis), + threading=Threading{:single}) where {T} + threading = threading isa Scheduler ? threading : SerialScheduler() z = zeros(type,length(points)) els, ad = assemblydata(basis) @@ -181,14 +184,14 @@ function potential!(store, op, points, basis; geo = geometry(basis) nf = numfunctions(rs, domain(chart(geo, first(geo)))) - zlocal = Array{type}(undef, nf) - qdata = quaddata(op,rs,els,quadstrat) - - print("dots out of 10: ") + qdata = quaddata(op,rs,els,quadstrat) - todo, done, pctg = length(points), 0, 0 + pbar = progressbar(length(points), true) - for (p,y) in enumerate(points) + @tasks for p in eachindex(points) + @set scheduler = threading + @local zlocal = Vector{T}(undef, nf) + y = points[p] for (q,el) in enumerate(els) fill!(zlocal,zero(type)) @@ -202,18 +205,9 @@ function potential!(store, op, points, basis; end end end - - done += 1 - new_pctg = round(Int, done / todo * 100) - if new_pctg > pctg + 9 - #println(todo," ",done," ",new_pctg) - print(".") - pctg = new_pctg - end + next!(pbar) end - - println("") - + finish!(pbar) end function potential!(store, op, points, basis::SpaceTimeBasis) @@ -285,7 +279,8 @@ function farfieldlocal!(zlocal,op,refspace,y,el,qr) krn = kernelvals(op, y, x) for r in 1 : length(zlocal) - zlocal[r] += integrand(op,krn,y,F[r],x) * dx + i = integrand(op,krn,y,F[r],x); i *= dx; + zlocal[r] += i end end diff --git a/test/Project.toml b/test/Project.toml index 406bc7ba..8f4254e5 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -5,8 +5,10 @@ Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" CompScienceMeshes = "3e66a162-7b8c-5da0-b8f8-124ecd2c3ae1" DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" +GraphsColoring = "ed38bb8a-6120-4024-8d1e-8789784f251b" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e" +OhMyThreads = "67456a42-1dca-4109-a031-0a68de7e3ad5" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" SauterSchwabQuadrature = "535c7bfe-2023-5c1d-b712-654ef9d93a38" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" diff --git a/test/runtests.jl b/test/runtests.jl index ce0cd25b..faad9af4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -100,6 +100,8 @@ include("test_analytic_excitation.jl") include("test_vie.jl") include("test_evie_dvie.jl") +include("test_coloring.jl") + @run_package_tests filter=ti->!(:example in ti.tags) verbose=true diff --git a/test/test_coloring.jl b/test/test_coloring.jl new file mode 100644 index 00000000..350f969a --- /dev/null +++ b/test/test_coloring.jl @@ -0,0 +1,93 @@ +using Test +using CompScienceMeshes, BEAST +using OhMyThreads, GraphsColoring + +@testset "Coloring tests" begin + + for m in [readmesh(joinpath(dirname(pathof(BEAST)), "../examples/sphere2.in")), meshrectangle(1.0, 1.0, 0.1)] + + for X in [ + raviartthomas(m), + n × raviartthomas(m), + BEAST.raviartthomas2(m), + buffachristiansen(m), + lagrangec0d1(m), + lagrangec0d2(m), + BEAST.gwpdiv(m; order=2), + BEAST.gwpcurl(m; order=2), + BEAST.gwpdiv(m; order=3), + BEAST.gwpcurl(m; order=3), + brezzidouglasmarini(m), + BEAST.ncrossbdm(m), + BEAST.nedelec2(m), + BEAST.nedelec(m) + ] + elements, ad, _ = assemblydata(X; onlyactives=true) + + g = GraphsColoring.conflictmatrix(X; addata=(elements, ad, nothing)) + + colors = BEAST.color(X, DynamicScheduler()) + + for i in eachindex(colors) + @test iszero(nnz(g[colors[i], colors[i]])) + end + + for color in colors + basisindices = Int[] + els = Int[] + + for element in color + + localbasisindices = Set{Int}() + for i in 1:length(ad[element]) + for (m, a) in ad[element, i] + iszero(a) && continue + push!(localbasisindices, m) + push!(els, element) + end + end + append!(basisindices, collect(localbasisindices)) + end + + sort!(basisindices) + @test unique(basisindices) == basisindices + end + end + end + + m = tetmeshsphere(1.0, 0.2) + X = nedelecd3d(m) + + for X in [nedelecd3d(m), nedelecc3d(m), raviartthomas(m), brezzidouglasmarini3d(m)] + elements, ad, _ = assemblydata(X; onlyactives=true) + + g = GraphsColoring.conflictmatrix(X; addata=(elements, ad, nothing)) + + colors = BEAST.color(X, DynamicScheduler()) + + for i in eachindex(colors) + @test iszero(nnz(g[colors[i], colors[i]])) + end + + for color in colors + basisindices = Int[] + els = Int[] + + for element in color + + localbasisindices = Set{Int}() + for i in 1:length(ad[element]) + for (m, a) in ad[element, i] + iszero(a) && continue + push!(localbasisindices, m) + push!(els, element) + end + end + append!(basisindices, collect(localbasisindices)) + end + + sort!(basisindices) + @test unique(basisindices) == basisindices + end + end +end From 20f82272fe98ab8075469931344dfe9045ba3fd1 Mon Sep 17 00:00:00 2001 From: Kristof Cools Date: Fri, 5 Dec 2025 14:46:01 +0100 Subject: [PATCH 2/2] integration of cellcoloring and comparison with dofsplitting --- src/composedoperators/composedoperator.jl | 8 ++-- src/dyadicop.jl | 3 +- src/localop.jl | 6 ++- src/operator.jl | 56 ++++++++++++++--------- test/test_coloring.jl | 28 ++++++++++++ 5 files changed, 74 insertions(+), 27 deletions(-) diff --git a/src/composedoperators/composedoperator.jl b/src/composedoperators/composedoperator.jl index 162fbc1a..e0469462 100644 --- a/src/composedoperators/composedoperator.jl +++ b/src/composedoperators/composedoperator.jl @@ -47,9 +47,10 @@ integralop(a::CompSingleInt) = CompSingleKern(a.op1,a.kernel,a.op2) function assemble!(op::AbstractCompInt, tfs::Space, bfs::Space, store, threading = Threading{:multi}; - quadstrat=defaultquadstrat) + quadstrat=defaultquadstrat, + kwargs...) - assemble!(integralop(op),op.tfunc(tfs),op.bfunc(bfs),store,threading;quadstrat) + assemble!(integralop(op),op.tfunc(tfs),op.bfunc(bfs),store,threading;quadstrat, kwargs...) end @@ -138,7 +139,8 @@ defaultquadstrat(op::CompSingleInt,tfs::Space,bfs::Space) = SingleNumQStrat(6) ##### assembling the single integral function assemble!(biop::CompSingleKern, tfs::Space, bfs::Space, store, threading::Type{Threading{:multi}}; - quadstrat=defaultquadstrat(biop, tfs, bfs)) + quadstrat=defaultquadstrat(biop, tfs, bfs), + kwargs...) return assemble_local_mixed!(biop, tfs, bfs, store; quadstrat) end diff --git a/src/dyadicop.jl b/src/dyadicop.jl index 5306d397..067a625d 100644 --- a/src/dyadicop.jl +++ b/src/dyadicop.jl @@ -35,7 +35,8 @@ end function assemble!(biop::DyadicOp, tfs::Space, bfs::Space, store, threading::Type{BEAST.Threading{:multi}}; - quadstrat=defaultquadstrat(biop, tfs, bfs)) + quadstrat=defaultquadstrat(biop, tfs, bfs), + kwargs...) u = assemble(biop.f, tfs) v = assemble(biop.g, bfs) diff --git a/src/localop.jl b/src/localop.jl index bebc1725..05a34c9a 100644 --- a/src/localop.jl +++ b/src/localop.jl @@ -59,7 +59,8 @@ end function assemble!(biop::LocalOperator, tfs::Space, bfs::Space, store, threading::Type{Threading{:multi}}; - quadstrat=defaultquadstrat) + quadstrat=defaultquadstrat, + kwargs...) quadstrat = quadstrat(biop, tfs, bfs) @@ -79,7 +80,8 @@ end function assemble!(biop::LocalOperator, tfs::Space, bfs::Space, store, threading::Type{Threading{:single}}; - quadstrat=defaultquadstrat) + quadstrat=defaultquadstrat, + kwargs...) quadstrat = quadstrat(biop, tfs, bfs) diff --git a/src/operator.jl b/src/operator.jl index e996c363..1b4a26fc 100644 --- a/src/operator.jl +++ b/src/operator.jl @@ -91,21 +91,27 @@ defaultquadstrat(lc::LinearCombinationOfOperators, tfs::DirectProductSpace, bfs: """ assemble(operator, test_functions, trial_functions; storage_policy = Val{:bandedstorage}, - threading = Threading{:multi}, + threading = :dofsplitting, + scheduler = OhMyThreads.DynamicScheduler(), quadstrat=defaultquadstrat(operator, test_functions, trial_functions)) Assemble the system matrix corresponding to the operator `operator` tested with the test functions `test_functions` and the trial functions `trial_functions`. """ function assemble(operator::AbstractOperator, test_functions, trial_functions; storage_policy = Val{:bandedstorage}, - threading = Threading{:multi}, + threading = :dofsplitting, + scheduler = OhMyThreads.DynamicScheduler(), quadstrat=defaultquadstrat) + (threading == :dofsplitting) && (threading = Threading{:multi}) + (threading == :cellcoloring) && (threading = Threading{:cellcoloring}) + (threading == :single) && (threading = Threading{:single}) + Z, store = allocatestorage(operator, test_functions, trial_functions, storage_policy) # qs = quadstrat(operator, test_functions, trial_functions) assemble!(operator, test_functions, trial_functions, - store, threading; quadstrat) + store, threading; quadstrat, scheduler) return Z() end @@ -189,7 +195,7 @@ end function assemble!(operator::Operator, test_functions::Space, trial_functions::Space, store, threading::Type{Threading{:multi}}; - quadstrat=defaultquadstrat) + quadstrat=defaultquadstrat, kwargs...) quadstrat = quadstrat(operator, test_functions, trial_functions) P = Threads.nthreads() @@ -207,14 +213,15 @@ end end function assemble!(operator::Operator, test_functions::Space, trial_functions::Space, store, threading::Type{Threading{:single}}; - quadstrat=defaultquadstrat) + quadstrat=defaultquadstrat, kwargs...) quadstrat = quadstrat(operator, test_functions, trial_functions) assemblechunk!(operator, test_functions, trial_functions, store; quadstrat) end function assemble!(operator::Operator, testfunctions::Space, trialfunctions::Space, - store, threading::Scheduler; quadstrat=defaultquadstrat) + store, threading::Type{Threading{:cellcoloring}}; + quadstrat=defaultquadstrat, scheduler) quadstrat = quadstrat(operator, testfunctions, trialfunctions) @@ -224,7 +231,7 @@ function assemble!(operator::Operator, testfunctions::Space, trialfunctions::Spa testad = (testelements, testad, 1:length(testelements)) trialad = (trialelements, trialad, 1:length(trialelements)) - testelementcolors = color(testfunctions, threading; addata=testad) + testelementcolors = color(testfunctions, scheduler; addata=testad) trialelementcolors = [1:length(trialelements)] qs = if CompScienceMeshes.refines(geometry(testfunctions), geometry(trialfunctions)) @@ -240,7 +247,7 @@ function assemble!(operator::Operator, testfunctions::Space, trialfunctions::Spa for (i, coloredtestelements) in enumerate(testelementcolors) for (j, coloredtrialelements) in enumerate(trialelementcolors) assemblechunk_body_colored!(operator, testfunctions, testad, coloredtestelements, - trialfunctions, trialad, coloredtrialelements, qdata, store, threading; quadstrat=qs) + trialfunctions, trialad, coloredtrialelements, qdata, store, scheduler; quadstrat=qs) next!(pbar; step = length(testelementcolors[i]) * length(trialelementcolors[j])) end end finish!(pbar) @@ -249,21 +256,23 @@ end function assemble!(op::TransposedOperator, tfs::Space, bfs::Space, store, threading::Type{Threading{:multi}} = Threading{:multi}; - quadstrat=defaultquadstrat(op, tfs, bfs)) + quadstrat=defaultquadstrat(op, tfs, bfs), + kwargs...) store1(v,m,n) = store(v,n,m) - assemble!(op.op, bfs, tfs, store1, threading; quadstrat) + assemble!(op.op, bfs, tfs, store1, threading; quadstrat, kwargs...) end function assemble!(op::LinearCombinationOfOperators, tfs::Space, bfs::Space, store, threading = Threading{:multi}; - quadstrat=defaultquadstrat) + quadstrat=defaultquadstrat, + kwargs...) for (a,A) in zip(op.coeffs, op.ops) store1(v,m,n) = store(a*v,m,n) # qs = quadstrat(A, tfs, bfs) - assemble!(A, tfs, bfs, store1, threading; quadstrat=quadstrat) + assemble!(A, tfs, bfs, store1, threading; quadstrat=quadstrat, kwargs...) end end # function assemble!(op::LinearCombinationOfOperators, tfs::Space, bfs::Space, @@ -282,38 +291,41 @@ end # Support for direct product spaces function assemble!(op::AbstractOperator, tfs::DirectProductSpace, bfs::Space, store, threading = Threading{:multi}; - quadstrat=defaultquadstrat(op, tfs[1], bfs)) + quadstrat=defaultquadstrat(op, tfs[1], bfs), + kwargs...) I = Int[0] for s in tfs.factors push!(I, last(I) + numfunctions(s)) end for (i,s) in enumerate(tfs.factors) store1(v,m,n) = store(v,m + I[i], n) - assemble!(op, s, bfs, store1, threading; quadstrat) + assemble!(op, s, bfs, store1, threading; quadstrat, kwargs...) end end function assemble!(op::AbstractOperator, tfs::Space, bfs::DirectProductSpace, store, threading=Threading{:multi}; - quadstrat=defaultquadstrat(op, tfs, bfs[1])) + quadstrat=defaultquadstrat(op, tfs, bfs[1]), + kwargs...) J = Int[0] for s in bfs.factors push!(J, last(J) + numfunctions(s)) end for (j,s) in enumerate(bfs.factors) store1(v,m,n) = store(v,m,n + J[j]) - assemble!(op, tfs, s, store1, threading; quadstrat) + assemble!(op, tfs, s, store1, threading; quadstrat, kwargs...) end end function assemble!(op::AbstractOperator, tfs::DirectProductSpace, bfs::DirectProductSpace, store, threading=Threading{:multi}; - quadstrat=defaultquadstrat(op, tfs[1], bfs[1])) + quadstrat=defaultquadstrat(op, tfs[1], bfs[1]), + kwargs...) I = Int[0] for s in tfs.factors push!(I, last(I) + numfunctions(s)) end for (i,s) in enumerate(tfs.factors) store1(v,m,n) = store(v,m + I[i],n) - assemble!(op, s, bfs, store1, threading; quadstrat) + assemble!(op, s, bfs, store1, threading; quadstrat, kwargs...) end end @@ -336,7 +348,8 @@ allocatestorage(op::BlockDiagonalOperator, X, Y, storage_trait, longdelays_trait function assemble!(op::BlockDiagonalOperator, U::DirectProductSpace, V::DirectProductSpace, store, threading=Threading{:multi}; - quadstrat = defaultquadstrat(op, U, V)) + quadstrat = defaultquadstrat(op, U, V), + kwargs...) @assert length(U.factors) == length(V.factors) I = Int[0]; for u in U.factors push!(I, last(I) + numfunctions(u)) end @@ -362,7 +375,8 @@ defaultquadstrat(op::BlockFullOperators, U::DirectProductSpace, V::DirectProduct function assemble!(op::BlockFullOperators, U::DirectProductSpace, V::DirectProductSpace, store, threading; - quadstrat = defaultquadstrat(op, U, V)) + quadstrat = defaultquadstrat(op, U, V), + kwargs...) I = Int[0]; for u in U.factors push!(I, last(I) + numfunctions(u)) end J = Int[0]; for v in V.factors push!(J, last(J) + numfunctions(v)) end @@ -370,7 +384,7 @@ function assemble!(op::BlockFullOperators, U::DirectProductSpace, V::DirectProdu for (k,u) in enumerate(U.factors) for (l,v) in enumerate(V.factors) store1(x,m,n) = store(x, I[k]+m, J[l]+n) - assemble!(op.op, u, v, store1, threading; quadstrat) + assemble!(op.op, u, v, store1, threading; quadstrat, kwargs...) end end end diff --git a/test/test_coloring.jl b/test/test_coloring.jl index 350f969a..e4a54782 100644 --- a/test/test_coloring.jl +++ b/test/test_coloring.jl @@ -91,3 +91,31 @@ using OhMyThreads, GraphsColoring end end end + + +@testitem "cellcoloring vs dofsplitting" begin + using CompScienceMeshes, OhMyThreads, LinearAlgebra + + fn = dirname(pathof(BEAST)) * "/../test/assets/sphere45.in" + m = readmesh(fn) + + T = Maxwell3D.singlelayer(wavenumber=3.0) + X = raviartthomas(m) + Y = buffachristiansen(m) + + Txx1 = assemble(T, X, X; + threading=:cellcoloring, + scheduler=OhMyThreads.DynamicScheduler()) + Txx2 = assemble(T, X, X; + threading=:dofsplitting, + scheduler=OhMyThreads.DynamicScheduler()) + @test norm(Txx1 - Txx2) < 1e-12 + + Tyy1 = assemble(T, Y, Y; + threading=:cellcoloring, + scheduler=OhMyThreads.DynamicScheduler()) + Tyy2 = assemble(T, Y, Y; + threading=:dofsplitting, + scheduler=OhMyThreads.DynamicScheduler()) + @test norm(Tyy1 - Tyy2) < 1e-12 +end \ No newline at end of file