Skip to content

Commit 7f4e47f

Browse files
committed
Merge branch 'master' of https://github.com/unslothai/llama.cpp
2 parents 9b1fae8 + a994ffa commit 7f4e47f

File tree

8 files changed

+34
-3
lines changed

8 files changed

+34
-3
lines changed

ggml/src/ggml-cann/ggml-cann.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2090,6 +2090,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
20902090
{
20912091
// TODO: add support
20922092
// ref: https://github.com/ggml-org/llama.cpp/pull/14274
2093+
#pragma message("TODO: implement F32, F16, BF16, Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, IQ4_NL support (https://github.com/ggml-org/llama.cpp/pull/14661)")
20932094
return false;
20942095
} break;
20952096
case GGML_OP_CPY: {

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2303,6 +2303,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
23032303
case GGML_UNARY_OP_EXP:
23042304
ggml_cuda_op_exp(ctx, dst);
23052305
break;
2306+
case GGML_UNARY_OP_ELU:
2307+
ggml_cuda_op_elu(ctx, dst);
2308+
break;
23062309
default:
23072310
return false;
23082311
}
@@ -3116,6 +3119,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
31163119
case GGML_UNARY_OP_GELU_QUICK:
31173120
case GGML_UNARY_OP_TANH:
31183121
case GGML_UNARY_OP_EXP:
3122+
case GGML_UNARY_OP_ELU:
31193123
return ggml_is_contiguous(op->src[0]);
31203124
default:
31213125
return false;
@@ -3222,7 +3226,8 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
32223226
} break;
32233227
case GGML_OP_SET_ROWS:
32243228
{
3225-
return (op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16) &&
3229+
#pragma message("TODO: implement Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, IQ4_NL support (https://github.com/ggml-org/llama.cpp/pull/14661)")
3230+
return (op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16 || op->type == GGML_TYPE_BF16) &&
32263231
op->src[0]->type == GGML_TYPE_F32 &&
32273232
op->src[1]->type == GGML_TYPE_I64;
32283233
} break;

ggml/src/ggml-cuda/set-rows.cu

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ __device__ __forceinline__ void set_rows_1<float, half>(const float * src_f, hal
1010
*dst_h = __float2half(*src_f);
1111
}
1212

13+
template<>
14+
__device__ __forceinline__ void set_rows_1<float, nv_bfloat16>(const float * src_f, nv_bfloat16 * dst_b) {
15+
*dst_b = *src_f;
16+
}
17+
1318
template<>
1419
__device__ __forceinline__ void set_rows_1<float, float>(const float * src_f, float * dst_f) {
1520
*dst_f = *src_f;
@@ -124,6 +129,16 @@ void ggml_cuda_op_set_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
124129
nb1, nb2, nb3,
125130
stream
126131
);
132+
} else if (dst->type == GGML_TYPE_BF16) {
133+
set_rows_cuda(
134+
src0_d, src1_d, (nv_bfloat16*)dst->data,
135+
ne00, ne01, ne02, ne03,
136+
ne10, ne11, ne12, ne13,
137+
nb01, nb02, nb03,
138+
nb10, nb11, nb12,
139+
nb1, nb2, nb3,
140+
stream
141+
);
127142
} else {
128143
GGML_ABORT("unsupported type");
129144
}

ggml/src/ggml-cuda/unary.cu

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@ static __device__ __forceinline__ float op_log(float x) {
8383
return logf(x);
8484
}
8585

86+
static __device__ __forceinline__ float op_elu(float x) {
87+
return (x > 0.f) ? x : expm1f(x);
88+
}
89+
8690
template <float (*op)(float), typename T>
8791
static __global__ void unary_op_kernel(const T * x, T * dst, const int k) {
8892
const int i = blockDim.x*blockIdx.x + threadIdx.x;
@@ -196,6 +200,9 @@ void ggml_cuda_op_log(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
196200
ggml_cuda_op_unary<op_log>(ctx, dst);
197201
}
198202

203+
void ggml_cuda_op_elu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
204+
ggml_cuda_op_unary<op_elu>(ctx, dst);
205+
}
199206
/* gated ops */
200207

201208
template <float (*op)(float), typename T>

ggml/src/ggml-cuda/unary.cuh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ void ggml_cuda_op_cos(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
5959

6060
void ggml_cuda_op_log(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
6161

62+
void ggml_cuda_op_elu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
63+
6264
void ggml_cuda_op_reglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
6365

6466
void ggml_cuda_op_geglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2280,6 +2280,7 @@ static bool ggml_opencl_supports_op(ggml_backend_dev_t dev, const struct ggml_te
22802280
{
22812281
// TODO: add support
22822282
// ref: https://github.com/ggml-org/llama.cpp/pull/14274
2283+
#pragma message("TODO: implement BF16, Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, IQ4_NL support (https://github.com/ggml-org/llama.cpp/pull/14661)")
22832284
if (op->src[0]->type != GGML_TYPE_F32) {
22842285
return false;
22852286
}

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4303,6 +4303,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
43034303
{
43044304
// TODO: add support
43054305
// ref: https://github.com/ggml-org/llama.cpp/pull/14274
4306+
#pragma message("TODO: implement BF16, Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, IQ4_NL support (https://github.com/ggml-org/llama.cpp/pull/14661)")
43064307
return (op->type == GGML_TYPE_F32 || (op->type == GGML_TYPE_F16 && op->src[0]->type == GGML_TYPE_F32 && op->src[1]->type == GGML_TYPE_I64));
43074308
} break;
43084309
case GGML_OP_CPY:

src/llama-quant.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -884,8 +884,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
884884
if (std::regex pattern(tname); std::regex_search(tensor_name, pattern)) {
885885
if (qtype != new_type) {
886886
LLAMA_LOG_DEBUG("(overriding %s) ", ggml_type_name(new_type));
887-
new_type = qtype;
888-
break; // if two or more types are specified for the tensor, first match wins
887+
new_type = qtype; // if two or more types are specified for the same tensor, the last match wins
889888
}
890889
}
891890
}

0 commit comments

Comments
 (0)