11using LoopVectorization, LinearAlgebra
22BLAS. set_num_threads (1 )
33
4- function jgemm! (C, A, B )
5- C .= 0
6- M, N = size (C ); K = size (B ,1 )
4+ function jgemm! (𝐂, 𝐀, 𝐁 )
5+ 𝐂 .= 0
6+ M, N = size (𝐂 ); K = size (𝐁 ,1 )
77 @inbounds for n ∈ 1 : N, k ∈ 1 : K
88 @simd ivdep for m ∈ 1 : M
9- C [m,n] += A [m,k] * B [k,n]
9+ 𝐂 [m,n] += 𝐀 [m,k] * 𝐁 [k,n]
1010 end
1111 end
1212end
13- @inline function jgemm! (C, Aᵀ :: Adjoint , B )
14- A = parent (Aᵀ )
15- @inbounds for n ∈ 1 : size (C ,2 ), m ∈ 1 : size (C ,1 )
16- Cₘₙ = zero (eltype (C ))
17- @simd ivdep for k ∈ 1 : size (A ,1 )
18- Cₘₙ += A [k,m] * B [k,n]
13+ @inline function jgemm! (𝐂, 𝐀ᵀ :: Adjoint , 𝐁 )
14+ 𝐀 = parent (𝐀ᵀ )
15+ @inbounds for n ∈ 1 : size (𝐂 ,2 ), m ∈ 1 : size (𝐂 ,1 )
16+ 𝐂ₘₙ = zero (eltype (𝐂 ))
17+ @simd ivdep for k ∈ 1 : size (𝐀 ,1 )
18+ 𝐂ₘₙ += 𝐀 [k,m] * 𝐁 [k,n]
1919 end
20- C [m,n] = Cₘₙ
20+ 𝐂 [m,n] = 𝐂ₘₙ
2121 end
2222end
23- @inline function jgemm! (C, A, Bᵀ :: Adjoint )
24- C .= 0
25- B = parent (Bᵀ )
26- M, N = size (C ); K = size (B ,1 )
23+ @inline function jgemm! (𝐂, 𝐀, 𝐁ᵀ :: Adjoint )
24+ 𝐂 .= 0
25+ 𝐁 = parent (𝐁ᵀ )
26+ M, N = size (𝐂 ); K = size (𝐁 ,1 )
2727 @inbounds for k ∈ 1 : K, n ∈ 1 : N
2828 @simd ivdep for m ∈ 1 : M
29- C [m,n] += A [m,k] * B [n,k]
29+ 𝐂 [m,n] += 𝐀 [m,k] * 𝐁 [n,k]
3030 end
3131 end
3232end
33- @inline function gemmavx! (C, A, B )
34- @avx for i ∈ 1 : size (A ,1 ), j ∈ 1 : size (B ,2 )
35- Cᵢⱼ = zero (eltype (C ))
36- for k ∈ 1 : size (A ,2 )
37- Cᵢⱼ += A[i ,k] * B [k,j ]
33+ @inline function gemmavx! (𝐂, 𝐀, 𝐁 )
34+ @avx for m ∈ 1 : size (𝐀 ,1 ), n ∈ 1 : size (𝐁 ,2 )
35+ 𝐂ₘₙ = zero (eltype (𝐂 ))
36+ for k ∈ 1 : size (𝐀 ,2 )
37+ 𝐂ₘₙ += 𝐀[m ,k] * 𝐁 [k,n ]
3838 end
39- C[i,j ] = Cᵢⱼ
39+ 𝐂[m,n ] = 𝐂ₘₙ
4040 end
4141end
4242function jdot (a, b)
43- s = 0.0
43+ s = zero ( eltype (a))
4444 @inbounds @simd ivdep for i ∈ eachindex (a, b)
4545 s += a[i] * b[i]
4646 end
4747 s
4848end
4949function jdotavx (a, b)
50- s = 0.0
50+ s = zero ( eltype (a))
5151 @avx for i ∈ eachindex (a, b)
5252 s += a[i] * b[i]
5353 end
5454 s
5555end
5656function jselfdot (a)
57- s = 0.0
57+ s = zero ( eltype (a))
5858 @inbounds @simd ivdep for i ∈ eachindex (a)
5959 s += a[i] * a[i]
6060 end
6161 s
6262end
6363function jselfdotavx (a)
64- s = 0.0
64+ s = zero ( eltype (a))
6565 @avx for i ∈ eachindex (a)
6666 s += a[i] * a[i]
6767 end
@@ -96,71 +96,71 @@ function jvexpavx!(b, a)
9696 end
9797end
9898function jsvexp (a)
99- s = 0.0
99+ s = zero ( eltype (a))
100100 @inbounds for i ∈ eachindex (a)
101101 s += exp (a[i])
102102 end
103103 s
104104end
105105function jsvexpavx (a)
106- s = 0.0
106+ s = zero ( eltype (a))
107107 @avx for i ∈ eachindex (a)
108108 s += exp (a[i])
109109 end
110110 s
111111end
112- function jgemv! (y, A , x)
113- y .= 0.0
112+ function jgemv! (y, 𝐀 , x)
113+ y .= zero ( eltype (y))
114114 @inbounds for j ∈ eachindex (x)
115115 @simd ivdep for i ∈ eachindex (y)
116- y[i] += A [i,j] * x[j]
116+ y[i] += 𝐀 [i,j] * x[j]
117117 end
118118 end
119119end
120- @inline function jgemv! (y, Aᵀ :: Adjoint , x )
121- A = parent (Aᵀ )
122- @inbounds for i ∈ eachindex (y )
123- yᵢ = 0.0
124- @simd ivdep for j ∈ eachindex (x )
125- yᵢ += A [j,i] * x [j]
120+ @inline function jgemv! (𝐲, 𝐀ᵀ :: Adjoint , 𝐱 )
121+ 𝐀 = parent (𝐀ᵀ )
122+ @inbounds for i ∈ eachindex (𝐲 )
123+ 𝐲ᵢ = zero ( eltype (𝐲))
124+ @simd ivdep for j ∈ eachindex (𝐱 )
125+ 𝐲ᵢ += 𝐀 [j,i] * 𝐱 [j]
126126 end
127- y [i] = yᵢ
127+ 𝐲 [i] = 𝐲ᵢ
128128 end
129129end
130- @inline function jgemvavx! (y, A, x )
131- @avx for i ∈ eachindex (y )
132- yᵢ = 0.0
133- for j ∈ eachindex (x )
134- yᵢ += A [i,j] * x [j]
130+ @inline function jgemvavx! (𝐲, 𝐀, 𝐱 )
131+ @avx for i ∈ eachindex (𝐲 )
132+ 𝐲ᵢ = zero ( eltype (𝐲))
133+ for j ∈ eachindex (𝐱 )
134+ 𝐲ᵢ += 𝐀 [i,j] * 𝐱 [j]
135135 end
136- y [i] = yᵢ
136+ 𝐲 [i] = 𝐲ᵢ
137137 end
138138end
139- function jvar! (s ², A , x̄)
140- @. s² = 0
141- @inbounds for i ∈ 1 : size (A ,2 )
142- @simd for j ∈ eachindex (s ²)
143- δ = A [j,i] - x̄[j]
144- s ²[j] += δ* δ
139+ function jvar! (𝐬 ², 𝐀 , x̄)
140+ @. s² = zero ( eltype (𝐬²))
141+ @inbounds for i ∈ 1 : size (𝐀 ,2 )
142+ @simd for j ∈ eachindex (𝐬 ²)
143+ δ = 𝐀 [j,i] - x̄[j]
144+ 𝐬 ²[j] += δ* δ
145145 end
146146 end
147147end
148- function jvaravx! (s ², A , x̄)
149- @avx for j ∈ eachindex (s ²)
150- s ²ⱼ = 0.0
148+ function jvaravx! (𝐬 ², 𝐀 , x̄)
149+ @avx for j ∈ eachindex (𝐬 ²)
150+ 𝐬 ²ⱼ = zero ( eltype (𝐬²))
151151 x̄ⱼ = x̄[j]
152- for i ∈ 1 : size (A ,2 )
153- δ = A [j,i] - x̄ⱼ
154- s ²ⱼ += δ* δ
152+ for i ∈ 1 : size (𝐀 ,2 )
153+ δ = 𝐀 [j,i] - x̄ⱼ
154+ 𝐬 ²ⱼ += δ* δ
155155 end
156- s ²[j] = s ²ⱼ
156+ 𝐬 ²[j] = 𝐬 ²ⱼ
157157 end
158158end
159159japlucBc! (d, a, B, c) = @. d = a + B * c' ;
160160japlucBcavx! (d, a, B, c) = @avx @. d = a + B * c' ;
161161
162162function jOLSlp (y, X, β)
163- lp = 0.0
163+ lp = zero ( eltype (y))
164164 @inbounds @fastmath for i ∈ eachindex (y)
165165 δ = y[i]
166166 @simd for j ∈ eachindex (β)
@@ -171,7 +171,7 @@ function jOLSlp(y, X, β)
171171 lp
172172end
173173function jOLSlp_avx (y, X, β)
174- lp = 0.0
174+ lp = zero ( eltype (y))
175175 @avx for i ∈ eachindex (y)
176176 δ = y[i]
177177 for j ∈ eachindex (β)
0 commit comments