diff --git a/Project.toml b/Project.toml index 5d00262..a957ce3 100644 --- a/Project.toml +++ b/Project.toml @@ -23,7 +23,7 @@ Exodus = "f57ae99e-f805-4780-bdca-96e224be1e5a" [extensions] FiniteElementContainersAdaptExt = "Adapt" -FiniteElementContainersCUDAExt = "CUDA" +FiniteElementContainersCUDAExt = ["Adapt", "CUDA"] FiniteElementContainersExodusExt = "Exodus" [compat] @@ -36,6 +36,7 @@ DocStringExtensions = "0.9" Exodus = "0.13" IterativeSolvers = "0.9" JET = "0.9" +Krylov = "0.9" KernelAbstractions = "0.9" LinearAlgebra = "1" Parameters = "0.12" @@ -55,9 +56,10 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Exodus = "f57ae99e-f805-4780-bdca-96e224be1e5a" IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153" JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" +Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" TestSetExtensions = "98d24dd4-01ad-11ea-1b02-c9a08f80db04" [targets] -test = ["Aqua", "CUDA", "Exodus", "IterativeSolvers", "JET", "Parameters", "Test", "TestSetExtensions"] +test = ["Aqua", "CUDA", "Exodus", "IterativeSolvers", "JET", "Krylov", "Parameters", "Test", "TestSetExtensions"] diff --git a/ext/FiniteElementContainersAdaptExt.jl b/ext/FiniteElementContainersAdaptExt.jl index 83d0b7d..ecc412f 100644 --- a/ext/FiniteElementContainersAdaptExt.jl +++ b/ext/FiniteElementContainersAdaptExt.jl @@ -3,16 +3,21 @@ module FiniteElementContainersAdaptExt using Adapt using FiniteElementContainers +FiniteElementContainersAdaptExt.cpu(x) = Adapt.adapt_structure(Array, x) + # Assemblers function Adapt.adapt_structure(to, asm::SparseMatrixAssembler) dof = Adapt.adapt_structure(to, asm.dof) pattern = Adapt.adapt_structure(to, asm.pattern) constraint_storage = Adapt.adapt_structure(to, asm.constraint_storage) residual_storage = Adapt.adapt_structure(to, asm.residual_storage) + residual_unknowns = Adapt.adapt_structure(to, asm.residual_unknowns) stiffness_storage = Adapt.adapt_structure(to, asm.stiffness_storage) return SparseMatrixAssembler( dof, pattern, - constraint_storage, residual_storage, stiffness_storage + constraint_storage, + residual_storage, residual_unknowns, + stiffness_storage ) end diff --git a/ext/FiniteElementContainersCUDAExt.jl b/ext/FiniteElementContainersCUDAExt.jl index 7a219ef..61117dd 100644 --- a/ext/FiniteElementContainersCUDAExt.jl +++ b/ext/FiniteElementContainersCUDAExt.jl @@ -1,16 +1,19 @@ module FiniteElementContainersCUDAExt +using Adapt using CUDA using FiniteElementContainers using KernelAbstractions +FiniteElementContainers.gpu(x) = Adapt.adapt_structure(CuArray, x) + # this method need to have the assembler initialized first # if the stored values in asm.pattern.cscnzval or zero # CUDA will error out function CUDA.CUSPARSE.CuSparseMatrixCSC(asm::SparseMatrixAssembler) @assert typeof(get_backend(asm)) <: CUDABackend "Assembler is not on a CUDA device" - @assert length(asm.pattern.cscnzval) > 0 "Need to assemble the assembler once" - @assert all(x -> x != zero(eltype(asm.pattern.cscnzval)), asm.pattern.cscnzval) "Need to assemble the assembler once" + @assert length(asm.pattern.cscnzval) > 0 "Need to assemble the assembler once with SparseArrays.sparse!(assembler)" + @assert all(x -> x != zero(eltype(asm.pattern.cscnzval)), asm.pattern.cscnzval) "Need to assemble the assembler once with SparseArrays.sparse!(assembler)" n_dofs = FiniteElementContainers.num_unknowns(asm.dof) return CUDA.CUSPARSE.CuSparseMatrixCSC( asm.pattern.csccolptr, @@ -20,6 +23,10 @@ function CUDA.CUSPARSE.CuSparseMatrixCSC(asm::SparseMatrixAssembler) ) end +function FiniteElementContainers._stiffness(asm::SparseMatrixAssembler, ::Backend) + return CUDA.CUSPARSE.CuSparseMatrixCSC(asm) +end + # this one isn't quite right # function CUDA.CUSPARSE.CuSparseMatrixCSR(asm::SparseMatrixAssembler) # n_dofs = FiniteElementContainers.num_unknowns(asm.dof) diff --git a/src/Assemblers.jl b/src/Assemblers.jl index d7f84ba..90b618b 100644 --- a/src/Assemblers.jl +++ b/src/Assemblers.jl @@ -256,6 +256,135 @@ function assemble!( return nothing end +# wrapper +function assemble!( + assembler::SparseMatrixAssembler, + func::F, + U::T, + sym::Symbol +) where {F, T} + assemble!(assembler, func, U, Val{sym}()) +end + +KA.@kernel function _assemble_residual_kernel!(asm, func, U, X, conns, ref_fe, block_id) + E = KA.@index(Global) + + # some helpers + NDim = size(X, 1) + ND, NN = num_dofs_per_node(asm.dof), num_nodes(asm.dof) + NNPE = ReferenceFiniteElements.num_vertices(ref_fe) + # NDim = ReferenceFiniteElements.num_dimensions(ref_fe) + NxNDof = NNPE * ND + # IDs = reshape(1:length(asm.dof), ND, size(U, 2)) + + # extract element level stuff + u_el = @views SMatrix{ND, NNPE, Float64, NxNDof}(U[:, conns[:, E]]) + x_el = @views SMatrix{NDim, NNPE, Float64, NDim * NNPE}(X[:, conns[:, E]]) + # dof_conns = reshape(IDs[:, conns], ND * length(conns)) + + # assemble element level residual + R_el = zeros(SVector{NxNDof, Float64}) + for q in 1:num_quadrature_points(ref_fe) + interps = MappedInterpolants(ref_fe.cell_interps.vals[q], x_el) + + # dynamic method invocation isn't working here. + # R_q = func(interps, u_el) + # R_el = R_el + R_q + + X_q, N, ∇N_X, JxW = interps.X_q, interps.N, interps.∇N_X, interps.JxW + ∇u_q = u_el * ∇N_X + f = 2. * π^2 * sin(2π * X_q[1]) * sin(4π * X_q[2]) + R_q = ∇u_q * ∇N_X' - N' * f + R_el = R_el + JxW * R_q[:] + end + + # now assemble + # TODO won't work for problems with more than one dof... + # need to create dof_conns + for i in axes(conns, 1) + Atomix.@atomic asm.residual_storage.vals[conns[i]] += R_el[i] + end +end + +KA.@kernel function _assemble_stiffness_kernel!(asm, func, U, X, conns, ref_fe, block_id) + E = KA.@index(Global) + + # some helpers + NDim = size(X, 1) + ND, NN = num_dofs_per_node(asm.dof), num_nodes(asm.dof) + NNPE = ReferenceFiniteElements.num_vertices(ref_fe) + # NDim = ReferenceFiniteElements.num_dimensions(ref_fe) + NxNDof = NNPE * ND + + # extract element level stuff + u_el = @views SMatrix{ND, NNPE, Float64, NxNDof}(U[:, conns[:, E]]) + x_el = @views SMatrix{NDim, NNPE, Float64, NDim * NNPE}(X[:, conns[:, E]]) + + # assemble element level residual + K_el = zeros(SMatrix{NxNDof, NxNDof, Float64, NxNDof * NxNDof}) + for q in 1:num_quadrature_points(ref_fe) + interps = MappedInterpolants(ref_fe.cell_interps.vals[q], x_el) + + # dynamic method invocation isn't working here. + # R_q = func(interps, u_el) + # R_el = R_el + R_q + + X_q, N, ∇N_X, JxW = interps.X_q, interps.N, interps.∇N_X, interps.JxW + K_q = ∇N_X * ∇N_X' + K_el = K_el + JxW * K_q + end + + # now assemble + # TODO won't work for problems with more than one dof... + # need to create dof_conns + # start_id = (block_id - 1) * asm.pattern.block_sizes[block_id] + + # (el_id - 1) * asm.pattern.block_offsets[block_id] + 1 + # end_id = start_id + asm.pattern.block_offsets[block_id] - 1 + # ids = start_id:end_id + # for id in ids + # Atomix.@atomic asm.stiffness_storage[id] += K_el[:] + # end +end + +# newer method try to dispatch based on field type +function assemble!( + assembler::SparseMatrixAssembler, + residual_func::F, + U::T, + ::Val{:residual} +) where {F <: Function, T <: H1Field} + # TODO hack for now, only grabbing first var fspace + # fspace = getproperty(assembler.dof.H1_vars, :H1_var_1) + fspace = assembler.dof.H1_vars[1].fspace + for (block_id, conns) in enumerate(values(fspace.elem_conns)) + ref_fe = values(fspace.ref_fes)[block_id] + # backend = KA.get_backend(assembler) + # TODO eventually dispatch on CPU vs. GPU + kernel! = _assemble_residual_kernel!(KA.get_backend(assembler)) + kernel!(assembler, residual_func, U, fspace.coords, conns, ref_fe, block_id, ndrange = size(conns, 2)) + end +end + +function assemble!( + assembler::SparseMatrixAssembler, + tangent_func::F, + U::T, + ::Val{:stiffness} +) where {F <: Function, T <: H1Field} + # TODO hack for now, only grabbing first var fspace + # fspace = getproperty(assembler.dof.H1_vars, :H1_var_1) + fspace = assembler.dof.H1_vars[1].fspace + for (block_id, conns) in enumerate(values(fspace.elem_conns)) + ref_fe = values(fspace.ref_fes)[block_id] + # backend = KA.get_backend(assembler) + # TODO eventually dispatch on CPU vs. GPU + + # TODO going to need to query the start/end ids for the block assembler + kernel! = _assemble_stiffness_kernel!(KA.get_backend(assembler)) + kernel!(assembler, tangent_func, U, fspace.coords, conns, ref_fe, block_id, ndrange = size(conns, 2)) + end +end + # TODO hardcoded for H1 function assemble!( R, @@ -265,6 +394,7 @@ function assemble!( ) where {F <: Function, T <: AbstractArray} R .= zero(eltype(R)) + # fill(R.vals, zero(eltype(R))) vars = assembler.dof.H1_vars @@ -274,42 +404,9 @@ function assemble!( fspace = vars[1].fspace - # NDim = size(fspace.coords, 1) - # ND, NN = num_dofs_per_node(assembler.dof), num_nodes(assembler.dof) - # ids = reshape(1:length(assembler.dof), ND, NN) - for (block_id, conns) in enumerate(values(fspace.elem_conns)) ref_fe = values(fspace.ref_fes)[block_id] - assemble!(R, assembler, residual_func, U, fspace.coords, conns, ref_fe) - # NNPE = ReferenceFiniteElements.num_vertices(ref_fe) - # NxNDof = NNPE * ND - # dof_conns = @views reshape(ids[:, conns], ND * size(conns, 1), size(conns, 2)) - - # for e in 1:size(conns, 2) - # AK.foraxes(conns, 2) do e - # u_el = @views SMatrix{ND, NNPE, Float64, NxNDof}(U[dof_conns[:, e]]) - # x_el = @views SMatrix{NDim, NNPE, Float64, NDim * NNPE}(fspace.coords[:, conns[:, e]]) - # # K_el = zeros(SMatrix{NxNDof, NxNDof, Float64, NxNDof * NxNDof}) - # # R_el = zeros(SMatrix{ND, NNPE, Float64, NxNDof}) - # R_el = zeros(SVector{NxNDof, Float64}) - - # for q in 1:num_quadrature_points(ref_fe) - # N = ReferenceFiniteElements.shape_function_value(ref_fe, q) - # ∇N_ξ = ReferenceFiniteElements.shape_function_gradient(ref_fe, q) - # ∇N_X = map_shape_function_gradients(x_el, ∇N_ξ) - # JxW = volume(x_el, ∇N_ξ) * quadrature_weight(ref_fe, q) - # x_q = x_el * N - # interps = Interpolants(x_q, N, ∇N_X, JxW) - - # R_q = residual_func(interps, u_el) - # R_el = R_el + JxW * R_q - # end - - # # assemble!(assembler, R_el, e, block_id) - # # Note this method is in old stuff TODO - # @views assemble!(R, R_el, dof_conns[:, e]) - # end end end @@ -396,6 +493,7 @@ function assemble!( end end +create_bcs(asm::SparseMatrixAssembler, type) = create_bcs(asm.dof, type) create_field(asm::SparseMatrixAssembler, type) = create_field(asm.dof, type) create_unknowns(asm::SparseMatrixAssembler) = create_unknowns(asm.dof) diff --git a/src/DofManagers.jl b/src/DofManagers.jl index 400fc2a..fc099d5 100644 --- a/src/DofManagers.jl +++ b/src/DofManagers.jl @@ -116,6 +116,10 @@ function _dof_manager_sym_name(u::VectorFunction) return Symbol(split(String(names(u)[1]), ['_'])[1]) end +function _dof_manager_vars(dof::DofManager, ::Type{<:H1Field}) + return dof.H1_vars +end + function _filter_field_type(vars, T) vars = filter(x -> isa(x.fspace.coords, T), vars) syms = map(_dof_manager_sym_name, vars) @@ -167,6 +171,14 @@ Base.length(dof::DofManager) = length(dof.H1_bc_dofs) + length(dof.H1_unknown_do KA.get_backend(dof::DofManager) = KA.get_backend(dof.H1_unknown_dofs) +""" +$(TYPEDSIGNATURES) +""" +function create_bcs(dof::DofManager, ::Type{H1Field}) + backend = KA.get_backend(dof.H1_bc_dofs) + return KA.zeros(backend, Float64, length(dof.H1_bc_dofs)) +end + """ $(TYPEDSIGNATURES) """ @@ -361,6 +373,33 @@ function update_field_unknowns!(U::H1Field, dof::DofManager, Uu::T) where T <: A return nothing end +KA.@kernel function _update_field_unknowns_kernel_plus!(U::H1Field, dof::DofManager, Uu::T) where T <: AbstractArray{<:Number, 1} + N = KA.@index(Global) + @inbounds U[dof.H1_unknown_dofs[N]] += Uu[N] +end + +function _update_field_unknowns!(U::H1Field, dof::DofManager, Uu::T, ::typeof(+), backend::KA.Backend) where T <: AbstractArray{<:Number, 1} + kernel! = _update_field_unknowns_kernel_plus!(backend) + kernel!(U, dof, Uu, ndrange = length(Uu)) + return nothing +end + +function _update_field_unknowns!(U::H1Field, dof::DofManager, Uu::T, ::typeof(+), ::KA.CPU) where T <: AbstractArray{<:Number, 1} + for (n, d) in enumerate(dof.H1_unknown_dofs) + U[d] += Uu[n] + end + return nothing +end + +""" +$(TYPEDSIGNATURES) +Does a simple addition on CPUs. On GPUs it uses a ```KernelAbstractions``` kernel +""" +function update_field_unknowns!(U::H1Field, dof::DofManager, Uu::T, ::typeof(+)) where T <: AbstractArray{<:Number, 1} + _update_field_unknowns!(U, dof, Uu, +, KA.get_backend(dof)) + return nothing +end + """ $(TYPEDSIGNATURES) """ diff --git a/src/FiniteElementContainers.jl b/src/FiniteElementContainers.jl index 701b449..0cfaa90 100644 --- a/src/FiniteElementContainers.jl +++ b/src/FiniteElementContainers.jl @@ -1,8 +1,14 @@ module FiniteElementContainers +# general +export cpu +export gpu + # Assemblers export SparseMatrixAssembler export assemble! +export residual +export stiffness # BCs export DirichletBC @@ -41,6 +47,7 @@ export connectivity # DofManager export DofManager +export create_bcs export create_field export create_unknowns export update_dofs! @@ -87,6 +94,9 @@ using Tensors abstract type FEMContainer end +function cpu end +function gpu end + # basic stuff include("fields/Fields.jl") include("Meshes.jl") @@ -100,6 +110,9 @@ include("Functions.jl") include("DofManagers.jl") include("bcs/BoundaryConditions.jl") include("formulations/Formulations.jl") -include("Assemblers.jl") +# include("Assemblers.jl") +include("assemblers/Assemblers.jl") + +include("physics/Physics.jl") end # module diff --git a/src/assemblers/Assemblers.jl b/src/assemblers/Assemblers.jl new file mode 100644 index 0000000..36a70fa --- /dev/null +++ b/src/assemblers/Assemblers.jl @@ -0,0 +1,244 @@ +abstract type AbstractAssembler{Dof <: DofManager} end + +""" +$(TYPEDSIGNATURES) +Assembly method for a scalar field stored as a size 1 vector +""" +function _assemble_element!(global_val::T, local_val, conn, e, b) where T <: AbstractArray{<:Number, 1} + global_val[1] += local_val + return nothing +end + +function _assemble_element!(global_val::H1Field, local_val, conn, e, b) + n_dofs = size(global_val, 1) + for i in axes(conn, 1) + for d in 1:n_dofs + # n = 2 * i + d + global_id = n_dofs * (conn[i] - 1) + d + local_id = n_dofs * (i - 1) + d + global_val[global_id] += local_val[local_id] + end + end + return nothing +end + +function _assemble_block!(assembler, physics, ::Val{:residual}, ref_fe, U, X, conns, block_id, ::KA.CPU) + ND = size(U, 1) + NNPE = ReferenceFiniteElements.num_vertices(ref_fe) + NxNDof = NNPE * ND + for e in axes(conns, 2) + x_el = _element_level_coordinates(X, ref_fe, conns, e) + u_el = _element_level_fields(U, ref_fe, conns, e) + R_el = zeros(SVector{NxNDof, Float64}) + + for q in 1:num_quadrature_points(ref_fe) + interps = MappedInterpolants(ref_fe.cell_interps.vals[q], x_el) + R_q = residual(physics, interps, u_el) + R_el = R_el + R_q + end + + @views _assemble_element!(assembler.residual_storage, R_el, conns[:, e], e, block_id) + end +end + +KA.@kernel function _assemble_block_residual_kernel!(assembler, physics, ref_fe, U, X, conns, block_id) + E = KA.@index(Global) + + ND = size(U, 1) + NNPE = ReferenceFiniteElements.num_vertices(ref_fe) + NxNDof = NNPE * ND + + x_el = _element_level_coordinates(X, ref_fe, conns, E) + u_el = _element_level_fields(U, ref_fe, conns, E) + R_el = zeros(SVector{NxNDof, Float64}) + + for q in 1:num_quadrature_points(ref_fe) + interps = MappedInterpolants(ref_fe.cell_interps.vals[q], x_el) + R_q = residual(physics, interps, u_el) + R_el = R_el + R_q + end + + # now assemble atomically + n_dofs = size(assembler.residual_storage, 1) + for i in 1:size(conns, 1) + for d in 1:n_dofs + global_id = n_dofs * (conns[i, E] - 1) + d + local_id = n_dofs * (i - 1) + d + Atomix.@atomic assembler.residual_storage.vals[global_id] += R_el[local_id] + end + end +end + +function _assemble_block!(assembler, physics, ::Val{:residual}, ref_fe, U, X, conns, block_id, backend::KA.Backend) + kernel! = _assemble_block_residual_kernel!(backend) + kernel!(assembler, physics, ref_fe, U, X, conns, block_id, ndrange=size(conns, 2)) + return nothing +end + +function _assemble_block!(assembler, physics, ::Val{:residual_and_stiffness}, ref_fe, U, X, conns, block_id, ::KA.CPU) + ND = size(U, 1) + NNPE = ReferenceFiniteElements.num_vertices(ref_fe) + NxNDof = NNPE * ND + for e in axes(conns, 2) + x_el = _element_level_coordinates(X, ref_fe, conns, e) + u_el = _element_level_fields(U, ref_fe, conns, e) + R_el = zeros(SVector{NxNDof, Float64}) + K_el = zeros(SMatrix{NxNDof, NxNDof, Float64, NxNDof * NxNDof}) + + for q in 1:num_quadrature_points(ref_fe) + interps = MappedInterpolants(ref_fe.cell_interps.vals[q], x_el) + R_q = residual(physics, interps, u_el) + K_q = stiffness(physics, interps, u_el) + R_el = R_el + R_q + K_el = K_el + K_q + end + + @views _assemble_element!(assembler.residual_storage, R_el, conns[:, e], e, block_id) + @views _assemble_element!(assembler, K_el, conns[:, e], e, block_id) + end + return nothing +end + +function _assemble_block!(assembler, physics, ::Val{:stiffness}, ref_fe, U, X, conns, block_id, ::KA.CPU) + ND = size(U, 1) + NNPE = ReferenceFiniteElements.num_vertices(ref_fe) + NxNDof = NNPE * ND + for e in axes(conns, 2) + x_el = _element_level_coordinates(X, ref_fe, conns, e) + u_el = _element_level_fields(U, ref_fe, conns, e) + K_el = zeros(SMatrix{NxNDof, NxNDof, Float64, NxNDof * NxNDof}) + + for q in 1:num_quadrature_points(ref_fe) + interps = MappedInterpolants(ref_fe.cell_interps.vals[q], x_el) + K_q = stiffness(physics, interps, u_el) + K_el = K_el + K_q + end + + @views _assemble_element!(assembler, K_el, conns[:, e], e, block_id) + end + return nothing +end + +KA.@kernel function _assemble_block_stiffness_kernel!(assembler, physics, ref_fe, U, X, conns, block_id) + E = KA.@index(Global) + + ND = size(U, 1) + NNPE = ReferenceFiniteElements.num_vertices(ref_fe) + NxNDof = NNPE * ND + + x_el = _element_level_coordinates(X, ref_fe, conns, E) + u_el = _element_level_fields(U, ref_fe, conns, E) + K_el = zeros(SMatrix{NxNDof, NxNDof, Float64, NxNDof * NxNDof}) + + for q in 1:num_quadrature_points(ref_fe) + interps = MappedInterpolants(ref_fe.cell_interps.vals[q], x_el) + K_q = stiffness(physics, interps, u_el) + K_el = K_el + K_q + end + + # start_id = (block_id - 1) * assembler.pattern.block_sizes[block_id] + + # (el_id - 1) * assembler.pattern.block_offsets[block_id] + 1 + # end_id = start_id + assembler.pattern.block_offsets[block_id] - 1 + # TODO patch this up for multi-block later + # hardcoded for single block quad4 + start_id = (E - 1) * 16 + 1 + end_id = start_id + 16 - 1 + ids = start_id:end_id + for (i, id) in enumerate(ids) + Atomix.@atomic assembler.stiffness_storage[id] += K_el.data[i] + end +end + +function _assemble_block!(assembler, physics, ::Val{:stiffness}, ref_fe, U, X, conns, block_id, backend::KA.Backend) + kernel! = _assemble_block_stiffness_kernel!(backend) + kernel!(assembler, physics, ref_fe, U, X, conns, block_id, ndrange=size(conns, 2)) + return nothing +end + +function _assemble_block!(assembler, physics, sym, ref_fe, U, X, conns, block_id) + backend = KA.get_backend(assembler) + # TODO add get_backend method of ref_fe + @assert backend == KA.get_backend(U) + @assert backend == KA.get_backend(X) + @assert backend == KA.get_backend(conns) + _assemble_block!(assembler, physics, Val{sym}(), ref_fe, U, X, conns, block_id, backend) +end + +""" +$(TYPEDSIGNATURES) +Top level assembly method +TODO make more general +""" +function assemble!(assembler, physics, U::H1Field, sym::Symbol) + # TODO need to generalize to different field types + # fill!(assembler.residual_storage, zero(eltype(assembler.residual_storage))) + _zero_storage(assembler, Val{sym}()) + fspace = assembler.dof.H1_vars[1].fspace + for (b, conns) in enumerate(values(fspace.elem_conns)) + ref_fe = values(fspace.ref_fes)[b] + _assemble_block!(assembler, physics, sym, ref_fe, U, fspace.coords, conns, b) + end + return nothing +end + + +create_bcs(asm::AbstractAssembler, type) = create_bcs(asm.dof, type) +create_field(asm::AbstractAssembler, type) = create_field(asm.dof, type) +create_unknowns(asm::AbstractAssembler) = create_unknowns(asm.dof) + +function _element_level_coordinates(X::H1Field, ref_fe, conns, e) + NDim = size(X, 1) + NNPE = ReferenceFiniteElements.num_vertices(ref_fe) + x_el = @views SMatrix{NDim, NNPE, Float64, NDim * NNPE}(X[:, conns[:, e]]) + return x_el +end + +function _element_level_fields(U::H1Field, ref_fe, conns, e) + ND = size(U, 1) + NNPE = ReferenceFiniteElements.num_vertices(ref_fe) + NxNDof = NNPE * ND + u_el = @views SMatrix{ND, NNPE, Float64, NxNDof}(U[:, conns[:, e]]) + return u_el +end + +KA.@kernel function _extract_residual_unknowns!(Ru, unknown_dofs, R) + N = KA.@index(Global) + Ru[N] = R[unknown_dofs[N]] +end + +function _residual(asm::AbstractAssembler, backend::KA.Backend) + kernel! = _extract_residual_unknowns!(backend) + kernel!(asm.residual_unknowns, + asm.dof.H1_unknown_dofs, + asm.residual_storage, + ndrange=length(asm.dof.H1_unknown_dofs)) + return asm.residual_unknowns +end + +# TODO hardcoded to H1 fields right now. +function _residual(asm::AbstractAssembler, ::KA.CPU) + # for n in axes(asm.residual_unknowns, 1) + # asm.residual_unknowns[n] = asm.residual_storage[asm.dof.H1_unknown_dofs[n]] + # end + @views asm.residual_unknowns .= asm.residual_storage[asm.dof.H1_unknown_dofs] + return asm.residual_unknowns +end + +function residual(asm::AbstractAssembler) + return _residual(asm, KA.get_backend(asm)) +end + +function update_field!(U, asm::AbstractAssembler, Uu, Ubc) + update_field!(U, asm.dof, Uu, Ubc) + return nothing +end + +function _zero_storage(asm::AbstractAssembler, ::Val{:residual}) + fill!(asm.residual_storage.vals, zero(eltype(asm.residual_storage.vals))) +end + +# some utilities +include("SparsityPattern.jl") + +# implementations +include("SparseMatrixAssembler.jl") diff --git a/src/assemblers/SparseMatrixAssembler.jl b/src/assemblers/SparseMatrixAssembler.jl new file mode 100644 index 0000000..9d87707 --- /dev/null +++ b/src/assemblers/SparseMatrixAssembler.jl @@ -0,0 +1,171 @@ +struct SparseMatrixAssembler{ + Dof <: DofManager, + Pattern <: SparsityPattern, + Storage1 <: AbstractArray{<:Number}, + Storage2 <: AbstractField, + Storage3 <: AbstractArray{<:Number, 1} +} <: AbstractAssembler{Dof} + dof::Dof + pattern::Pattern + constraint_storage::Storage1 + residual_storage::Storage2 + residual_unknowns::Storage3 + stiffness_storage::Storage1 +end + +# TODO this will not work for other than single H1 spaces +function SparseMatrixAssembler(dof::DofManager, type::Type{<:AbstractField}) + pattern = SparsityPattern(dof, type) + constraint_storage = zeros(length(dof)) + # constraint_storage = zeros(_dof_manager_vars(dof, type)) + constraint_storage[dof.H1_bc_dofs] .= 1. + # fill!(constraint_storage, ) + # residual_storage = zeros(length(dof)) + residual_storage = create_field(dof, H1Field) + residual_unknowns = create_unknowns(dof) + stiffness_storage = zeros(num_entries(pattern)) + return SparseMatrixAssembler( + dof, pattern, + constraint_storage, + residual_storage, residual_unknowns, + stiffness_storage + ) +end + +# TODO how best to do this? +# Should probably be a block array maybe? +# function SparseMatrixAssembler(vars...) +# dof = DofManager(vars...) +# return SparseMatrixAssembler(dof) +# end + +function Base.show(io::IO, asm::SparseMatrixAssembler) + println(io, "SparseMatrixAssembler") + println(io, " ", asm.dof) +end + +function _assemble_element!(asm::SparseMatrixAssembler, K_el::SMatrix, conn, el_id::Int, block_id::Int) + start_id = (block_id - 1) * asm.pattern.block_sizes[block_id] + + (el_id - 1) * asm.pattern.block_offsets[block_id] + 1 + end_id = start_id + asm.pattern.block_offsets[block_id] - 1 + ids = start_id:end_id + @views asm.stiffness_storage[ids] += K_el[:] + return nothing +end + +KA.get_backend(asm::SparseMatrixAssembler) = KA.get_backend(asm.dof) + +""" +$(TYPEDSIGNATURES) +""" +function SparseArrays.sparse(assembler::SparseMatrixAssembler) + # ids = pattern.unknown_dofs + pattern = assembler.pattern + storage = assembler.stiffness_storage + return @views sparse(pattern.Is, pattern.Js, storage) +end + +""" +$(TYPEDSIGNATURES) +""" +function SparseArrays.sparse!(assembler::SparseMatrixAssembler) + # ids = pattern.unknown_dofs + pattern = assembler.pattern + storage = assembler.stiffness_storage + return @views SparseArrays.sparse!( + pattern.Is, pattern.Js, storage[assembler.pattern.unknown_dofs], + length(pattern.klasttouch), length(pattern.klasttouch), +, pattern.klasttouch, + pattern.csrrowptr, pattern.csrcolval, pattern.csrnzval, + pattern.csccolptr, pattern.cscrowval, pattern.cscnzval + ) +end + +function SparseArrays.spdiagm(assembler::SparseMatrixAssembler) + return SparseArrays.spdiagm(assembler.constraint_storage) +end + +function _stiffness(assembler::SparseMatrixAssembler, ::KA.CPU) + return SparseArrays.sparse!(assembler) +end + +function stiffness(assembler::SparseMatrixAssembler) + return _stiffness(assembler, KA.get_backend(assembler)) +end + +# TODO Need to specialize below for different field types +function update_dofs!(assembler::SparseMatrixAssembler, dirichlet_bcs::T) where T <: AbstractArray{DirichletBC} + vars = assembler.dof.H1_vars + + if length(vars) != 1 + @assert false "multiple fspace not supported yet" + end + + # fspace = vars[1].fspace + + # make this more efficient + dirichlet_dofs = unique!(sort!(vcat(map(x -> x.bookkeeping.dofs, dirichlet_bcs)...))) + update_dofs!(assembler, dirichlet_dofs) + return nothing +end + +function update_dofs!(assembler::SparseMatrixAssembler, dirichlet_dofs::T) where T <: AbstractArray{<:Integer, 1} + dirichlet_dofs = copy(dirichlet_dofs) + unique!(sort!(dirichlet_dofs)) + update_dofs!(assembler.dof, dirichlet_dofs) + + # resize the resiual unkowns + resize!(assembler.residual_unknowns, num_unknowns(assembler.dof)) + + n_total_dofs = length(assembler.dof) - length(dirichlet_dofs) + + # TODO change to a good sizehint! + resize!(assembler.pattern.Is, 0) + resize!(assembler.pattern.Js, 0) + resize!(assembler.pattern.unknown_dofs, 0) + + ND, NN = num_dofs_per_node(assembler.dof), num_nodes(assembler.dof) + ids = reshape(1:length(assembler.dof), ND, NN) + + # TODO + vars = assembler.dof.H1_vars + fspace = vars[1].fspace + + n = 1 + # for fspace in fspaces + for conns in values(fspace.elem_conns) + dof_conns = @views reshape(ids[:, conns], ND * size(conns, 1), size(conns, 2)) + + # for e in 1:num_elements(fspace) + for e in 1:size(conns, 2) + # AK.foraxes(conns, 2) do e + # conn = dof_connectivity(fspace, e) + conn = @views dof_conns[:, e] + for temp in Iterators.product(conn, conn) + if insorted(temp[1], dirichlet_dofs) || insorted(temp[2], dirichlet_dofs) + # really do nothing here + else + push!(assembler.pattern.Is, temp[1] - count(x -> x < temp[1], dirichlet_dofs)) + push!(assembler.pattern.Js, temp[2] - count(x -> x < temp[2], dirichlet_dofs)) + push!(assembler.pattern.unknown_dofs, n) + end + n += 1 + end + end + end + + resize!(assembler.pattern.klasttouch, n_total_dofs) + resize!(assembler.pattern.csrrowptr, n_total_dofs + 1) + resize!(assembler.pattern.csrcolval, length(assembler.pattern.Is)) + resize!(assembler.pattern.csrnzval, length(assembler.pattern.Is)) + + return nothing +end + +function _zero_storage(asm::SparseMatrixAssembler, ::Val{:stiffness}) + fill!(asm.stiffness_storage, zero(eltype(asm.stiffness_storage))) +end + +function _zero_storage(asm::SparseMatrixAssembler, ::Val{:residual_and_stiffness}) + _zero_storage(asm, Val{:residual}()) + fill!(asm.stiffness_storage, zero(eltype(asm.stiffness_storage))) +end \ No newline at end of file diff --git a/src/assemblers/SparsityPattern.jl b/src/assemblers/SparsityPattern.jl new file mode 100644 index 0000000..fb74c9c --- /dev/null +++ b/src/assemblers/SparsityPattern.jl @@ -0,0 +1,108 @@ +struct SparsityPattern{ + I <: AbstractArray{Int, 1}, + R <: AbstractArray{Float64, 1}, +} + Is::I + Js::I + unknown_dofs::I + block_sizes::I + block_offsets::I + # cache arrays + klasttouch::I + csrrowptr::I + csrcolval::I + csrnzval::R + # additional cache arrays + csccolptr::I + cscrowval::I + cscnzval::R +end + +# TODO won't work for H(div) or H(curl) yet +function SparsityPattern(dof, type::Type{<:AbstractField}) + + # get number of dofs for creating cache arrays + + # TODO this line needs to be specialized for aribitrary fields + # it's hardcoded for H1 firght now. + ND, NN = num_dofs_per_node(dof), num_nodes(dof) + + n_total_dofs = NN * ND + vars = _dof_manager_vars(dof, type) + n_blocks = length(vars[1].fspace.ref_fes) + # n_blocks = length(dof.H1_vars[1].fspace.ref_fes) + + # first get total number of entries in a stupid manner + n_entries = 0 + block_sizes = Vector{Int64}(undef, n_blocks) + block_offsets = Vector{Int64}(undef, n_blocks) + + # for (n, conn) in enumerate(values(dof.H1_vars[1].fspace.elem_conns)) + for (n, conn) in enumerate(values(vars[1].fspace.elem_conns)) + ids = reshape(1:n_total_dofs, ND, NN) + # conn = reshape(ids[:, conn], ND * size(conn, 1), size(conn, 2)) + # display(block) + conn = reshape(ids[:, conn], ND * size(conn, 1), size(conn, 2)) + + # hacky for now + if eltype(conn) <: SVector + n_entries += length(conn[1])^2 * length(conn) + block_sizes[n] = length(conn) + block_offsets[n] = length(conn[1])^2 + else + n_entries += size(conn, 1)^2 * size(conn, 2) + block_sizes[n] = size(conn, 2) + block_offsets[n] = size(conn, 1)^2 + end + end + + # setup pre-allocated arrays based on number of entries found above + Is = Vector{Int64}(undef, n_entries) + Js = Vector{Int64}(undef, n_entries) + unknown_dofs = Vector{Int64}(undef, n_entries) + + # now loop over function spaces and elements + n = 1 + # for fspace in fspaces + # for block in valkeys(dof.vars[1].fspace.elem_conns) + # for conn in values(dof.H1_vars[1].fspace.elem_conns) + for conn in values(vars[1].fspace.elem_conns) + ids = reshape(1:n_total_dofs, ND, NN) + # conn = getproperty(dof.vars[1].fspace.elem_conns, block) + block_conn = reshape(ids[:, conn], ND * size(conn, 1), size(conn, 2)) + + # for e in 1:num_elements(fspace) + for e in axes(block_conn, 2) + # conn = dof_connectivity(fspace, e) + conn = @views block_conn[:, e] + for temp in Iterators.product(conn, conn) + Is[n] = temp[1] + Js[n] = temp[2] + unknown_dofs[n] = n + n += 1 + end + end + end + + # create caches + klasttouch = zeros(Int64, n_total_dofs) + csrrowptr = zeros(Int64, n_total_dofs + 1) + csrcolval = zeros(Int64, length(Is)) + csrnzval = zeros(Float64, length(Is)) + + csccolptr = Vector{Int64}(undef, 0) + cscrowval = Vector{Int64}(undef, 0) + cscnzval = Vector{Float64}(undef, 0) + + return SparsityPattern( + Is, Js, + unknown_dofs, + block_sizes, block_offsets, + # cache arrays + klasttouch, csrrowptr, csrcolval, csrnzval, + # additional cache arrays + csccolptr, cscrowval, cscnzval + ) +end + +num_entries(s::SparsityPattern) = length(s.Is) diff --git a/src/physics/Physics.jl b/src/physics/Physics.jl new file mode 100644 index 0000000..9c8463f --- /dev/null +++ b/src/physics/Physics.jl @@ -0,0 +1,6 @@ +abstract type AbstractPhysics end + +# better define this interface +function energy end +function residual end +function stiffness end diff --git a/test/poisson/TestPoisson2.jl b/test/poisson/TestPoisson2.jl index 43a647f..490abcf 100644 --- a/test/poisson/TestPoisson2.jl +++ b/test/poisson/TestPoisson2.jl @@ -1,5 +1,7 @@ +# using BenchmarkTools using Exodus using FiniteElementContainers +using Krylov using Parameters # methods for a simple Poisson problem @@ -7,14 +9,17 @@ f(X, _) = 2. * π^2 * sin(π * X[1]) * sin(π * X[2]) bc_func(_, _) = 0. -function residual(cell, u_el) +struct Poisson <: FiniteElementContainers.AbstractPhysics +end + +function FiniteElementContainers.residual(::Poisson, cell, u_el, args...) @unpack X_q, N, ∇N_X, JxW = cell ∇u_q = u_el * ∇N_X R_q = ∇u_q * ∇N_X' - N' * f(X_q, 0.0) return JxW * R_q[:] end -function tangent(cell, u_el) +function FiniteElementContainers.stiffness(::Poisson, cell, u_el, args...) @unpack X_q, N, ∇N_X, JxW = cell K_q = ∇N_X * ∇N_X' return JxW * K_q @@ -25,8 +30,10 @@ end function poisson_v2() mesh = UnstructuredMesh("./poisson/poisson.g") V = FunctionSpace(mesh, H1Field, Lagrange) + physics = Poisson() u = ScalarFunction(V, :u) - asm = SparseMatrixAssembler(u) + dof = DofManager(u) + asm = SparseMatrixAssembler(dof, H1Field) # setup and update bcs dbcs = DirichletBC[ @@ -39,28 +46,29 @@ function poisson_v2() # pre-setup some scratch arrays Uu = create_unknowns(asm) - Ubc = zeros(length(asm.dof.H1_bc_dofs)) + Ubc = create_bcs(asm, H1Field) U = create_field(asm, H1Field) - R = create_field(asm, H1Field) update_field!(U, asm, Uu, Ubc) + assemble!(asm, physics, U, :residual_and_stiffness) + K = stiffness(asm) - assemble!(R, asm, residual, U) - assemble!(asm, tangent, U) - - K = SparseArrays.sparse!(asm) + for n in 1:5 + Ru = residual(asm) + # ΔUu = -K \ Ru + ΔUu, stat = cg(-K, Ru) + update_field_unknowns!(U, asm.dof, ΔUu, +) + assemble!(asm, physics, U, :residual) - for n in 1:3 - ΔUu = -K \ R[asm.dof.H1_unknown_dofs] - U[asm.dof.H1_unknown_dofs] .=+ ΔUu + @show norm(ΔUu) norm(Ru) - @time assemble!(R, asm, residual, U) - - if norm(ΔUu) < 1e-12 || norm(R[asm.dof.H1_unknown_dofs]) < 1e-12 + if norm(ΔUu) < 1e-12 || norm(Ru) < 1e-12 break end end + @show maximum(U) + copy_mesh("./poisson/poisson.g", "./poisson/poisson.e") exo = ExodusDatabase("./poisson/poisson.e", "rw") write_names(exo, NodalVariable, ["u"]) @@ -72,4 +80,5 @@ function poisson_v2() end poisson_v2() -poisson_v2() \ No newline at end of file +# @time poisson_v2() +# @benchmark poisson_v2() diff --git a/test/poisson/TestPoissonCUDA.jl b/test/poisson/TestPoissonCUDA.jl index abff5b4..ae096b7 100644 --- a/test/poisson/TestPoissonCUDA.jl +++ b/test/poisson/TestPoissonCUDA.jl @@ -1,51 +1,100 @@ import KernelAbstractions as KA using Adapt +using BenchmarkTools using CUDA using Exodus using FiniteElementContainers +using Krylov using LinearAlgebra using Parameters using ReferenceFiniteElements using SparseArrays # methods for a simple Poisson problem -f(X, _) = 2. * π^2 * sin(2π * X[1]) * sin(4π * X[2]) +f(X, _) = 2. * π^2 * sin(2π * X[1]) * sin(2π * X[2]) +bc_func(_, _) = 0. -function residual(cell, u_el) +struct Poisson <: FiniteElementContainers.AbstractPhysics +end + +function FiniteElementContainers.residual(::Poisson, cell, u_el, args...) @unpack X_q, N, ∇N_X, JxW = cell ∇u_q = u_el * ∇N_X R_q = ∇u_q * ∇N_X' - N' * f(X_q, 0.0) return JxW * R_q[:] end -function tangent(cell, u_el) +function FiniteElementContainers.stiffness(::Poisson, cell, u_el, args...) @unpack X_q, N, ∇N_X, JxW = cell K_q = ∇N_X * ∇N_X' return JxW * K_q end -# do all setup on CPU -# the mesh for instance is not gpu compatable -mesh = UnstructuredMesh("./test/poisson/poisson.g") -V = FunctionSpace(mesh, H1, Lagrange) -u = ScalarFunction(V, :u) -dof = NewDofManager(u) -asm = SparseMatrixAssembler(dof) +function poisson_cuda() + # do all setup on CPU + # the mesh for instance is not gpu compatable + mesh = UnstructuredMesh("./test/poisson/poisson.g") + V = FunctionSpace(mesh, H1Field, Lagrange) + physics = Poisson() + u = ScalarFunction(V, :u) + dof = DofManager(u) + asm = SparseMatrixAssembler(dof, H1Field) + + dbcs = DirichletBC[ + DirichletBC(asm.dof, :u, :sset_1, bc_func), + DirichletBC(asm.dof, :u, :sset_2, bc_func), + DirichletBC(asm.dof, :u, :sset_3, bc_func), + DirichletBC(asm.dof, :u, :sset_4, bc_func), + ] + # TODO this one will be tough to do on the GPU + update_dofs!(asm, dbcs) + + # need to assemble once before moving to GPU + # TODO try to wrap this in the |> gpu call + U = create_field(asm, H1Field) + assemble!(asm, physics, U, :stiffness) + K = stiffness(asm) + + # device movement + asm_gpu = asm |> gpu -bc_nodes = sort!(unique!(vcat(values(mesh.nodeset_nodes)...))) + Uu = create_unknowns(asm_gpu) + Ubc = create_bcs(asm_gpu, H1Field) + U = create_field(asm_gpu, H1Field) -# TODO this one will be tought to do on the GPU -update_dofs!(asm, bc_nodes) + update_field!(U, asm_gpu, Uu, Ubc) + assemble!(asm_gpu, physics, U, :residual) + assemble!(asm_gpu, physics, U, :stiffness) -# TODO move some of these to after GPU movement -# to test kernels on GPU + # Ru = residual(asm_gpu) + K = stiffness(asm_gpu) -asm_gpu = Adapt.adapt_structure(CuArray, asm) -# bc_nodes_gpu = Adapt.adapt_structure(CuArray, bc_nodes) + for n in 1:3 + # ΔUu = -K \ Ru + Ru = residual(asm_gpu) + ΔUu, stats = cg(-K, Ru) + update_field_unknowns!(U, asm_gpu.dof, ΔUu, +) + assemble!(asm_gpu, physics, U, :residual) + + @show norm(ΔUu) norm(Ru) + + if norm(ΔUu) < 1e-12 || norm(Ru) < 1e-12 + break + end + end + + # update_field!(U, asm_gpu, Uu, Ubc) + U = U |> cpu + + copy_mesh("./test/poisson/poisson.g", "./test/poisson/poisson.e") + exo = ExodusDatabase("./test/poisson/poisson.e", "rw") + write_names(exo, NodalVariable, ["u"]) + write_time(exo, 1, 0.0) + write_values(exo, NodalVariable, 1, "u", U[1, :]) + close(exo) +end -Uu = create_unknowns(asm_gpu) -Ubc = KA.zeros(KA.get_backend(Uu), Float64, length(bc_nodes)) -U = create_field(asm_gpu, H1) -R = create_field(asm_gpu, H1) +# @time poisson_cuda() +# @time poisson_cuda() -update_field!(U, asm_gpu.dof, Uu, Ubc) +@benchmark poisson_cuda()