Skip to content

Commit 9dc2ffd

Browse files
Merge pull request #557 from mxpoch/main
Updating LoopVectorization for 1.12
2 parents 945c2f7 + db7af8c commit 9dc2ffd

File tree

9 files changed

+93
-81
lines changed

9 files changed

+93
-81
lines changed

Project.toml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,12 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
2929
[weakdeps]
3030
ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
3131
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
32+
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
3233
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
3334

3435
[extensions]
3536
ForwardDiffExt = ["ChainRulesCore", "ForwardDiff"]
37+
ForwardDiffNNlibExt = ["ForwardDiff", "NNlib"]
3638
SpecialFunctionsExt = "SpecialFunctions"
3739

3840
[compat]
@@ -46,6 +48,7 @@ HostCPUFeatures = "0.1.10"
4648
IfElse = "0.1"
4749
LayoutPointers = "0.1.11"
4850
LinearAlgebra = "1"
51+
NNlib = "0.9.31"
4952
OffsetArrays = "1.4.1"
5053
PolyesterWeave = "0.1.10, 0.2"
5154
PrecompileTools = "1"
@@ -56,5 +59,9 @@ Static = "0.8.4, 1"
5659
StaticArrayInterface = "1"
5760
ThreadingUtilities = "0.5"
5861
UnPack = "1"
59-
VectorizationBase = "0.21.67"
60-
julia = "1.6"
62+
VectorizationBase = "0.21.72"
63+
julia = "1.10"
64+
65+
[extras]
66+
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
67+
NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"

docs/make.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ makedocs(;
3131
],
3232
# repo="https://github.com/JuliaSIMD/LoopVectorization.jl/blob/{commit}{path}#L{line}",
3333
sitename = "LoopVectorization.jl",
34-
authors = "Chris Elrod"
34+
authors = "Chris Elrod",
35+
checkdocs=:exports,
3536
# assets=[],
3637
)
3738

docs/src/api.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# API reference
22

3+
```@docs
4+
LoopVectorization
5+
```
6+
37
## Macros
48

59
```@docs
@@ -12,6 +16,8 @@
1216
```@docs
1317
vmap
1418
vmap!
19+
vmapt
20+
vmapt!
1521
vmapnt
1622
vmapnt!
1723
vmapntt
@@ -27,7 +33,12 @@ LoopVectorization.vfilter!
2733

2834
## `reduce`-like constructs
2935
```@docs
36+
vsum
3037
vreduce
3138
vmapreduce
3239
```
3340

41+
## Operators
42+
```@docs
43+
44+
```

docs/src/index.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,3 @@ Pages = [
3030
]
3131
Depth = 1
3232
```
33-
34-

ext/ForwardDiffExt.jl

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
module ForwardDiffExt
22
import ForwardDiff, ChainRulesCore
33
using LoopVectorization, VectorizationBase, SLEEFPirates, ForwardDiff
4+
using SLEEFPirates: tanh_fast, sigmoid_fast
45

56
import IfElse: ifelse
67
using VectorizationBase: AbstractSIMD, AbstractMask, zero_offsets
78

89
using LoopVectorization:
910
AbstractSIMD,
1011
AbstractStridedPointer,
11-
relu,
1212
vmap,
1313
VectorizationBase,
1414
vmapt,
@@ -140,22 +140,6 @@ end
140140
)
141141
end
142142
end
143-
@generated function VectorizationBase.relu(
144-
x::ForwardDiff.Dual{T,S,N}
145-
) where {T,S,N}
146-
quote
147-
$(Expr(:meta, :inline))
148-
v = x.value
149-
z = zero(v)
150-
cmp = v < z
151-
r = ifelse(cmp, z, v)
152-
p = x.partials
153-
ForwardDiff.Dual{T}(
154-
r,
155-
ForwardDiff.Partials(Base.Cartesian.@ntuple $N n -> ifelse(cmp, z, p[n]))
156-
)
157-
end
158-
end
159143

160144
@generated function _ifelse(
161145
m::Union{AbstractMask,VecUnroll{<:Any,<:Any,Bit,<:AbstractMask}},
@@ -284,15 +268,6 @@ function ChainRulesCore.rrule(::typeof(sigmoid_fast), x)
284268
end
285269
s, ∂
286270
end
287-
function ChainRulesCore.rrule(::typeof(relu), v)
288-
z = zero(v)
289-
cmp = v < z
290-
r = ifelse(cmp, z, v)
291-
= let cmp = cmp
292-
y -> (ChainRulesZero(), ifelse(cmp, zero(y), y))
293-
end
294-
r, ∂
295-
end
296271

297272
function ∂vmap_singlethread!(
298273
f::F,

ext/ForwardDiffNNlibExt.jl

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
module ForwardDiffNNlibExt
2+
import ForwardDiff
3+
using LoopVectorization, VectorizationBase, SLEEFPirates, ForwardDiff, NNlib
4+
5+
@generated function NNlib.relu(
6+
x::ForwardDiff.Dual{T,<:LoopVectorization.AbstractSIMD,N}
7+
) where {T,S,N}
8+
quote
9+
$(Expr(:meta, :inline))
10+
v = x.value
11+
z = zero(v)
12+
cmp = v < z
13+
r = ifelse(cmp, z, v)
14+
p = x.partials
15+
ForwardDiff.Dual{T}(
16+
r,
17+
ForwardDiff.Partials(Base.Cartesian.@ntuple $N n -> ifelse(cmp, z, p[n]))
18+
)
19+
end
20+
end
21+
22+
@generated function NNlib.leakyrelu(
23+
x::ForwardDiff.Dual{T,<:LoopVectorization.AbstractSIMD,N},
24+
a = 0.01
25+
) where {T,S,N}
26+
quote
27+
$(Expr(:meta, :inline))
28+
v = x.value
29+
z = zero(v)
30+
31+
α = convert(typeof(v), a)
32+
cmp = v < z
33+
r = ifelse(cmp, α * v, v)
34+
p = x.partials
35+
ForwardDiff.Dual{T}(
36+
r,
37+
ForwardDiff.Partials(Base.Cartesian.@ntuple $N n -> ifelse(cmp, α * p[n], p[n]))
38+
)
39+
end
40+
end
41+
42+
end

src/LoopVectorization.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ if isdefined(Base, :Experimental) &&
2525
@eval Base.Experimental.@max_methods 1
2626
end
2727
export LowDimArray,
28-
static, stridedpointer, *ˡ, _turbo_!, tanh_fast, sigmoid_fast
28+
static, stridedpointer, *ˡ, tanh_fast, sigmoid_fast
2929

3030
using ArrayInterface: UpTri, LoTri
3131
using Static: StaticInt, gt, static, Zero, One, reduce_tup

src/constructors.jl

Lines changed: 22 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -61,53 +61,41 @@ function substitute_broadcast(
6161
configarg = (inline, u₁, u₂, v, true, threads, warncheckarg, safe)
6262
unroll_param_tup =
6363
Expr(:call, lv(:avx_config_val), :(Val{$configarg}()), staticexpr(0))
64+
6465
for n 1:nargs
6566
_ciₙ = ci[n]
66-
if _ciₙ isa Symbol
67-
syms[n] = _ciₙ::Symbol
68-
else
69-
syms[n] = Symbol('%', n)
70-
#ciₙ::Expr = _ciₙ::Expr
71-
if _ciₙ isa Expr
72-
ciₙ = _ciₙ
73-
elseif _ciₙ isa GlobalRef
74-
ciₙ = Expr(:globalref, _ciₙ.mod, _ciₙ.name)
67+
syms[n] = Symbol('%', n)
68+
69+
if _ciₙ isa Core.SSAValue
70+
push!(lb.args, Expr(:(=), syms[n], syms[_ciₙ.id]))
71+
72+
elseif _ciₙ isa GlobalRef
73+
if _ciₙ.mod === Base || _ciₙ.mod === Core
74+
push!(lb.args, Expr(:(=), syms[n], lv(_ciₙ.name)))
7575
else
76-
error("Unexpected type in ci: $(typeof(_ciₙ))")
76+
push!(lb.args, Expr(:(=), syms[n], _ciₙ.name))
7777
end
78-
ciₙargs = ciₙ.args
79-
f = first(ciₙargs)
80-
if ciₙ.head === :(=)
81-
push!(lb.args, Expr(:(=), f, syms[((ciₙargs[2])::Core.SSAValue).id]))
82-
elseif isglobalref(f, Base, :materialize!)
83-
add_ci_call!(
84-
lb,
85-
lv(:vmaterialize!),
86-
ciₙargs,
87-
syms,
88-
n,
89-
unroll_param_tup,
90-
mod
91-
)
78+
79+
elseif _ciₙ isa Expr && _ciₙ.head === :call
80+
f = first(_ciₙ.args)
81+
if isglobalref(f, Base, :materialize!)
82+
add_ci_call!(lb, lv(:vmaterialize!), _ciₙ.args, syms, n, unroll_param_tup, mod)
9283
elseif isglobalref(f, Base, :materialize)
93-
add_ci_call!(
94-
lb,
95-
lv(:vmaterialize),
96-
ciₙargs,
97-
syms,
98-
n,
99-
unroll_param_tup,
100-
mod
101-
)
84+
add_ci_call!(lb, lv(:vmaterialize), _ciₙ.args, syms, n, unroll_param_tup, mod)
10285
else
103-
add_ci_call!(lb, f, ciₙargs, syms, n)
86+
add_ci_call!(lb, f, _ciₙ.args, syms, n)
10487
end
88+
89+
else
90+
push!(lb.args, Expr(:(=), syms[n], _ciₙ))
10591
end
10692
end
93+
10794
ret::Expr = pop!(lb.args)::Expr
10895
if Meta.isexpr(ret, :(=), 2)
10996
ret = (ret.args[2])::Expr
11097
end
98+
11199
esc(Expr(:let, lb, Expr(:block, ret)))
112100
end
113101

test/forwarddiffext.jl

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,6 @@ function tovec(x::ForwardDiff.Dual{T,V,N}) where {T,V,N}
1616
return ret
1717
end
1818

19-
if LoopVectorization.ifelse !== Base.ifelse
20-
@inline function NNlib.leakyrelu(
21-
x::LoopVectorization.AbstractSIMD,
22-
a = NNlib.oftf(x, NNlib.leakyrelu_a),
23-
)
24-
LoopVectorization.ifelse(x > zero(x), float(x), NNlib.oftf(x, a * x)) # max(a*x, x) is 3x slower
25-
end
26-
@inline function NNlib.leakyrelu(
27-
x::ForwardDiff.Dual{<:Any,<:LoopVectorization.AbstractSIMD},
28-
a = NNlib.oftf(x, NNlib.leakyrelu_a),
29-
)
30-
LoopVectorization.ifelse(x > zero(x), float(x), NNlib.oftf(x, a * x)) # max(a*x, x) is 3x slower
31-
end
32-
end
33-
3419
vx0 = randnvec()
3520
vx1 = randnvec()
3621
vx2 = randnvec()
@@ -50,3 +35,8 @@ vud = ForwardDiff.Dual(vu0, vu1, vu2)
5035
reinterpret(Float64, NNlib.leakyrelu.(tovec(vd0)))
5136
@test reinterpret(Float64, tovec(NNlib.leakyrelu(vud)))
5237
reinterpret(Float64, NNlib.leakyrelu.(tovec(vud)))
38+
39+
@test reinterpret(Float64, tovec(NNlib.relu(vd0)))
40+
reinterpret(Float64, NNlib.relu.(tovec(vd0)))
41+
@test reinterpret(Float64, tovec(NNlib.relu(vud)))
42+
reinterpret(Float64, NNlib.relu.(tovec(vud)))

0 commit comments

Comments
 (0)