Skip to content

Commit dfdc74e

Browse files
authored
Merge pull request #80 from Cthonios/assemblers-gpu
Assemblers gpu
2 parents 6679f2d + b4fa54e commit dfdc74e

12 files changed

+828
-77
lines changed

Project.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ Exodus = "f57ae99e-f805-4780-bdca-96e224be1e5a"
2323

2424
[extensions]
2525
FiniteElementContainersAdaptExt = "Adapt"
26-
FiniteElementContainersCUDAExt = "CUDA"
26+
FiniteElementContainersCUDAExt = ["Adapt", "CUDA"]
2727
FiniteElementContainersExodusExt = "Exodus"
2828

2929
[compat]
@@ -36,6 +36,7 @@ DocStringExtensions = "0.9"
3636
Exodus = "0.13"
3737
IterativeSolvers = "0.9"
3838
JET = "0.9"
39+
Krylov = "0.9"
3940
KernelAbstractions = "0.9"
4041
LinearAlgebra = "1"
4142
Parameters = "0.12"
@@ -55,9 +56,10 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
5556
Exodus = "f57ae99e-f805-4780-bdca-96e224be1e5a"
5657
IterativeSolvers = "42fd0dbc-a981-5370-80f2-aaf504508153"
5758
JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
59+
Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"
5860
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
5961
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
6062
TestSetExtensions = "98d24dd4-01ad-11ea-1b02-c9a08f80db04"
6163

6264
[targets]
63-
test = ["Aqua", "CUDA", "Exodus", "IterativeSolvers", "JET", "Parameters", "Test", "TestSetExtensions"]
65+
test = ["Aqua", "CUDA", "Exodus", "IterativeSolvers", "JET", "Krylov", "Parameters", "Test", "TestSetExtensions"]

ext/FiniteElementContainersAdaptExt.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,21 @@ module FiniteElementContainersAdaptExt
33
using Adapt
44
using FiniteElementContainers
55

6+
FiniteElementContainersAdaptExt.cpu(x) = Adapt.adapt_structure(Array, x)
7+
68
# Assemblers
79
function Adapt.adapt_structure(to, asm::SparseMatrixAssembler)
810
dof = Adapt.adapt_structure(to, asm.dof)
911
pattern = Adapt.adapt_structure(to, asm.pattern)
1012
constraint_storage = Adapt.adapt_structure(to, asm.constraint_storage)
1113
residual_storage = Adapt.adapt_structure(to, asm.residual_storage)
14+
residual_unknowns = Adapt.adapt_structure(to, asm.residual_unknowns)
1215
stiffness_storage = Adapt.adapt_structure(to, asm.stiffness_storage)
1316
return SparseMatrixAssembler(
1417
dof, pattern,
15-
constraint_storage, residual_storage, stiffness_storage
18+
constraint_storage,
19+
residual_storage, residual_unknowns,
20+
stiffness_storage
1621
)
1722
end
1823

ext/FiniteElementContainersCUDAExt.jl

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
module FiniteElementContainersCUDAExt
22

3+
using Adapt
34
using CUDA
45
using FiniteElementContainers
56
using KernelAbstractions
67

8+
FiniteElementContainers.gpu(x) = Adapt.adapt_structure(CuArray, x)
9+
710
# this method need to have the assembler initialized first
811
# if the stored values in asm.pattern.cscnzval or zero
912
# CUDA will error out
1013
function CUDA.CUSPARSE.CuSparseMatrixCSC(asm::SparseMatrixAssembler)
1114
@assert typeof(get_backend(asm)) <: CUDABackend "Assembler is not on a CUDA device"
12-
@assert length(asm.pattern.cscnzval) > 0 "Need to assemble the assembler once"
13-
@assert all(x -> x != zero(eltype(asm.pattern.cscnzval)), asm.pattern.cscnzval) "Need to assemble the assembler once"
15+
@assert length(asm.pattern.cscnzval) > 0 "Need to assemble the assembler once with SparseArrays.sparse!(assembler)"
16+
@assert all(x -> x != zero(eltype(asm.pattern.cscnzval)), asm.pattern.cscnzval) "Need to assemble the assembler once with SparseArrays.sparse!(assembler)"
1417
n_dofs = FiniteElementContainers.num_unknowns(asm.dof)
1518
return CUDA.CUSPARSE.CuSparseMatrixCSC(
1619
asm.pattern.csccolptr,
@@ -20,6 +23,10 @@ function CUDA.CUSPARSE.CuSparseMatrixCSC(asm::SparseMatrixAssembler)
2023
)
2124
end
2225

26+
function FiniteElementContainers._stiffness(asm::SparseMatrixAssembler, ::Backend)
27+
return CUDA.CUSPARSE.CuSparseMatrixCSC(asm)
28+
end
29+
2330
# this one isn't quite right
2431
# function CUDA.CUSPARSE.CuSparseMatrixCSR(asm::SparseMatrixAssembler)
2532
# n_dofs = FiniteElementContainers.num_unknowns(asm.dof)

src/Assemblers.jl

Lines changed: 131 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,135 @@ function assemble!(
256256
return nothing
257257
end
258258

259+
# wrapper
260+
function assemble!(
261+
assembler::SparseMatrixAssembler,
262+
func::F,
263+
U::T,
264+
sym::Symbol
265+
) where {F, T}
266+
assemble!(assembler, func, U, Val{sym}())
267+
end
268+
269+
KA.@kernel function _assemble_residual_kernel!(asm, func, U, X, conns, ref_fe, block_id)
270+
E = KA.@index(Global)
271+
272+
# some helpers
273+
NDim = size(X, 1)
274+
ND, NN = num_dofs_per_node(asm.dof), num_nodes(asm.dof)
275+
NNPE = ReferenceFiniteElements.num_vertices(ref_fe)
276+
# NDim = ReferenceFiniteElements.num_dimensions(ref_fe)
277+
NxNDof = NNPE * ND
278+
# IDs = reshape(1:length(asm.dof), ND, size(U, 2))
279+
280+
# extract element level stuff
281+
u_el = @views SMatrix{ND, NNPE, Float64, NxNDof}(U[:, conns[:, E]])
282+
x_el = @views SMatrix{NDim, NNPE, Float64, NDim * NNPE}(X[:, conns[:, E]])
283+
# dof_conns = reshape(IDs[:, conns], ND * length(conns))
284+
285+
# assemble element level residual
286+
R_el = zeros(SVector{NxNDof, Float64})
287+
for q in 1:num_quadrature_points(ref_fe)
288+
interps = MappedInterpolants(ref_fe.cell_interps.vals[q], x_el)
289+
290+
# dynamic method invocation isn't working here.
291+
# R_q = func(interps, u_el)
292+
# R_el = R_el + R_q
293+
294+
X_q, N, ∇N_X, JxW = interps.X_q, interps.N, interps.∇N_X, interps.JxW
295+
∇u_q = u_el * ∇N_X
296+
f = 2. * π^2 * sin(2π * X_q[1]) * sin(4π * X_q[2])
297+
R_q = ∇u_q * ∇N_X' - N' * f
298+
R_el = R_el + JxW * R_q[:]
299+
end
300+
301+
# now assemble
302+
# TODO won't work for problems with more than one dof...
303+
# need to create dof_conns
304+
for i in axes(conns, 1)
305+
Atomix.@atomic asm.residual_storage.vals[conns[i]] += R_el[i]
306+
end
307+
end
308+
309+
KA.@kernel function _assemble_stiffness_kernel!(asm, func, U, X, conns, ref_fe, block_id)
310+
E = KA.@index(Global)
311+
312+
# some helpers
313+
NDim = size(X, 1)
314+
ND, NN = num_dofs_per_node(asm.dof), num_nodes(asm.dof)
315+
NNPE = ReferenceFiniteElements.num_vertices(ref_fe)
316+
# NDim = ReferenceFiniteElements.num_dimensions(ref_fe)
317+
NxNDof = NNPE * ND
318+
319+
# extract element level stuff
320+
u_el = @views SMatrix{ND, NNPE, Float64, NxNDof}(U[:, conns[:, E]])
321+
x_el = @views SMatrix{NDim, NNPE, Float64, NDim * NNPE}(X[:, conns[:, E]])
322+
323+
# assemble element level residual
324+
K_el = zeros(SMatrix{NxNDof, NxNDof, Float64, NxNDof * NxNDof})
325+
for q in 1:num_quadrature_points(ref_fe)
326+
interps = MappedInterpolants(ref_fe.cell_interps.vals[q], x_el)
327+
328+
# dynamic method invocation isn't working here.
329+
# R_q = func(interps, u_el)
330+
# R_el = R_el + R_q
331+
332+
X_q, N, ∇N_X, JxW = interps.X_q, interps.N, interps.∇N_X, interps.JxW
333+
K_q = ∇N_X * ∇N_X'
334+
K_el = K_el + JxW * K_q
335+
end
336+
337+
# now assemble
338+
# TODO won't work for problems with more than one dof...
339+
# need to create dof_conns
340+
# start_id = (block_id - 1) * asm.pattern.block_sizes[block_id] +
341+
# (el_id - 1) * asm.pattern.block_offsets[block_id] + 1
342+
# end_id = start_id + asm.pattern.block_offsets[block_id] - 1
343+
# ids = start_id:end_id
344+
# for id in ids
345+
# Atomix.@atomic asm.stiffness_storage[id] += K_el[:]
346+
# end
347+
end
348+
349+
# newer method try to dispatch based on field type
350+
function assemble!(
351+
assembler::SparseMatrixAssembler,
352+
residual_func::F,
353+
U::T,
354+
::Val{:residual}
355+
) where {F <: Function, T <: H1Field}
356+
# TODO hack for now, only grabbing first var fspace
357+
# fspace = getproperty(assembler.dof.H1_vars, :H1_var_1)
358+
fspace = assembler.dof.H1_vars[1].fspace
359+
for (block_id, conns) in enumerate(values(fspace.elem_conns))
360+
ref_fe = values(fspace.ref_fes)[block_id]
361+
# backend = KA.get_backend(assembler)
362+
# TODO eventually dispatch on CPU vs. GPU
363+
kernel! = _assemble_residual_kernel!(KA.get_backend(assembler))
364+
kernel!(assembler, residual_func, U, fspace.coords, conns, ref_fe, block_id, ndrange = size(conns, 2))
365+
end
366+
end
367+
368+
function assemble!(
369+
assembler::SparseMatrixAssembler,
370+
tangent_func::F,
371+
U::T,
372+
::Val{:stiffness}
373+
) where {F <: Function, T <: H1Field}
374+
# TODO hack for now, only grabbing first var fspace
375+
# fspace = getproperty(assembler.dof.H1_vars, :H1_var_1)
376+
fspace = assembler.dof.H1_vars[1].fspace
377+
for (block_id, conns) in enumerate(values(fspace.elem_conns))
378+
ref_fe = values(fspace.ref_fes)[block_id]
379+
# backend = KA.get_backend(assembler)
380+
# TODO eventually dispatch on CPU vs. GPU
381+
382+
# TODO going to need to query the start/end ids for the block assembler
383+
kernel! = _assemble_stiffness_kernel!(KA.get_backend(assembler))
384+
kernel!(assembler, tangent_func, U, fspace.coords, conns, ref_fe, block_id, ndrange = size(conns, 2))
385+
end
386+
end
387+
259388
# TODO hardcoded for H1
260389
function assemble!(
261390
R,
@@ -265,6 +394,7 @@ function assemble!(
265394
) where {F <: Function, T <: AbstractArray}
266395

267396
R .= zero(eltype(R))
397+
# fill(R.vals, zero(eltype(R)))
268398

269399
vars = assembler.dof.H1_vars
270400

@@ -274,42 +404,9 @@ function assemble!(
274404

275405
fspace = vars[1].fspace
276406

277-
# NDim = size(fspace.coords, 1)
278-
# ND, NN = num_dofs_per_node(assembler.dof), num_nodes(assembler.dof)
279-
# ids = reshape(1:length(assembler.dof), ND, NN)
280-
281407
for (block_id, conns) in enumerate(values(fspace.elem_conns))
282408
ref_fe = values(fspace.ref_fes)[block_id]
283-
284409
assemble!(R, assembler, residual_func, U, fspace.coords, conns, ref_fe)
285-
# NNPE = ReferenceFiniteElements.num_vertices(ref_fe)
286-
# NxNDof = NNPE * ND
287-
# dof_conns = @views reshape(ids[:, conns], ND * size(conns, 1), size(conns, 2))
288-
289-
# for e in 1:size(conns, 2)
290-
# AK.foraxes(conns, 2) do e
291-
# u_el = @views SMatrix{ND, NNPE, Float64, NxNDof}(U[dof_conns[:, e]])
292-
# x_el = @views SMatrix{NDim, NNPE, Float64, NDim * NNPE}(fspace.coords[:, conns[:, e]])
293-
# # K_el = zeros(SMatrix{NxNDof, NxNDof, Float64, NxNDof * NxNDof})
294-
# # R_el = zeros(SMatrix{ND, NNPE, Float64, NxNDof})
295-
# R_el = zeros(SVector{NxNDof, Float64})
296-
297-
# for q in 1:num_quadrature_points(ref_fe)
298-
# N = ReferenceFiniteElements.shape_function_value(ref_fe, q)
299-
# ∇N_ξ = ReferenceFiniteElements.shape_function_gradient(ref_fe, q)
300-
# ∇N_X = map_shape_function_gradients(x_el, ∇N_ξ)
301-
# JxW = volume(x_el, ∇N_ξ) * quadrature_weight(ref_fe, q)
302-
# x_q = x_el * N
303-
# interps = Interpolants(x_q, N, ∇N_X, JxW)
304-
305-
# R_q = residual_func(interps, u_el)
306-
# R_el = R_el + JxW * R_q
307-
# end
308-
309-
# # assemble!(assembler, R_el, e, block_id)
310-
# # Note this method is in old stuff TODO
311-
# @views assemble!(R, R_el, dof_conns[:, e])
312-
# end
313410
end
314411
end
315412

@@ -396,6 +493,7 @@ function assemble!(
396493
end
397494
end
398495

496+
create_bcs(asm::SparseMatrixAssembler, type) = create_bcs(asm.dof, type)
399497
create_field(asm::SparseMatrixAssembler, type) = create_field(asm.dof, type)
400498
create_unknowns(asm::SparseMatrixAssembler) = create_unknowns(asm.dof)
401499

src/DofManagers.jl

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ function _dof_manager_sym_name(u::VectorFunction)
116116
return Symbol(split(String(names(u)[1]), ['_'])[1])
117117
end
118118

119+
function _dof_manager_vars(dof::DofManager, ::Type{<:H1Field})
120+
return dof.H1_vars
121+
end
122+
119123
function _filter_field_type(vars, T)
120124
vars = filter(x -> isa(x.fspace.coords, T), vars)
121125
syms = map(_dof_manager_sym_name, vars)
@@ -167,6 +171,14 @@ Base.length(dof::DofManager) = length(dof.H1_bc_dofs) + length(dof.H1_unknown_do
167171

168172
KA.get_backend(dof::DofManager) = KA.get_backend(dof.H1_unknown_dofs)
169173

174+
"""
175+
$(TYPEDSIGNATURES)
176+
"""
177+
function create_bcs(dof::DofManager, ::Type{H1Field})
178+
backend = KA.get_backend(dof.H1_bc_dofs)
179+
return KA.zeros(backend, Float64, length(dof.H1_bc_dofs))
180+
end
181+
170182
"""
171183
$(TYPEDSIGNATURES)
172184
"""
@@ -361,6 +373,33 @@ function update_field_unknowns!(U::H1Field, dof::DofManager, Uu::T) where T <: A
361373
return nothing
362374
end
363375

376+
KA.@kernel function _update_field_unknowns_kernel_plus!(U::H1Field, dof::DofManager, Uu::T) where T <: AbstractArray{<:Number, 1}
377+
N = KA.@index(Global)
378+
@inbounds U[dof.H1_unknown_dofs[N]] += Uu[N]
379+
end
380+
381+
function _update_field_unknowns!(U::H1Field, dof::DofManager, Uu::T, ::typeof(+), backend::KA.Backend) where T <: AbstractArray{<:Number, 1}
382+
kernel! = _update_field_unknowns_kernel_plus!(backend)
383+
kernel!(U, dof, Uu, ndrange = length(Uu))
384+
return nothing
385+
end
386+
387+
function _update_field_unknowns!(U::H1Field, dof::DofManager, Uu::T, ::typeof(+), ::KA.CPU) where T <: AbstractArray{<:Number, 1}
388+
for (n, d) in enumerate(dof.H1_unknown_dofs)
389+
U[d] += Uu[n]
390+
end
391+
return nothing
392+
end
393+
394+
"""
395+
$(TYPEDSIGNATURES)
396+
Does a simple addition on CPUs. On GPUs it uses a ```KernelAbstractions``` kernel
397+
"""
398+
function update_field_unknowns!(U::H1Field, dof::DofManager, Uu::T, ::typeof(+)) where T <: AbstractArray{<:Number, 1}
399+
_update_field_unknowns!(U, dof, Uu, +, KA.get_backend(dof))
400+
return nothing
401+
end
402+
364403
"""
365404
$(TYPEDSIGNATURES)
366405
"""

src/FiniteElementContainers.jl

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
module FiniteElementContainers
22

3+
# general
4+
export cpu
5+
export gpu
6+
37
# Assemblers
48
export SparseMatrixAssembler
59
export assemble!
10+
export residual
11+
export stiffness
612

713
# BCs
814
export DirichletBC
@@ -41,6 +47,7 @@ export connectivity
4147

4248
# DofManager
4349
export DofManager
50+
export create_bcs
4451
export create_field
4552
export create_unknowns
4653
export update_dofs!
@@ -87,6 +94,9 @@ using Tensors
8794

8895
abstract type FEMContainer end
8996

97+
function cpu end
98+
function gpu end
99+
90100
# basic stuff
91101
include("fields/Fields.jl")
92102
include("Meshes.jl")
@@ -100,6 +110,9 @@ include("Functions.jl")
100110
include("DofManagers.jl")
101111
include("bcs/BoundaryConditions.jl")
102112
include("formulations/Formulations.jl")
103-
include("Assemblers.jl")
113+
# include("Assemblers.jl")
114+
include("assemblers/Assemblers.jl")
115+
116+
include("physics/Physics.jl")
104117

105118
end # module

0 commit comments

Comments
 (0)