Skip to content

Commit f51e944

Browse files
committed
talk-llama : sync llama.cpp
1 parent 3b647ee commit f51e944

File tree

6 files changed

+1985
-809
lines changed

6 files changed

+1985
-809
lines changed

examples/talk-llama/llama-impl.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,24 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
2424
#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
2525
#define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
2626
#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)
27+
28+
//
29+
// helpers
30+
//
31+
32+
static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
33+
if (search.empty()) {
34+
return;
35+
}
36+
std::string builder;
37+
builder.reserve(s.length());
38+
size_t pos = 0;
39+
size_t last_pos = 0;
40+
while ((pos = s.find(search, last_pos)) != std::string::npos) {
41+
builder.append(s, last_pos, pos - last_pos);
42+
builder.append(replace);
43+
last_pos = pos + search.length();
44+
}
45+
builder.append(s, last_pos, std::string::npos);
46+
s = std::move(builder);
47+
}

examples/talk-llama/llama-sampling.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,14 @@ void llama_sample_top_k_impl(struct llama_sampling * smpl, llama_token_data_arra
8585
constexpr float bucket_low = -10.0f;
8686
constexpr float bucket_high = 10.0f;
8787
constexpr float bucket_scale = nbuckets/(bucket_high - bucket_low);
88-
constexpr float bucker_inter = -bucket_low * bucket_scale;
88+
constexpr float bucket_inter = -bucket_low * bucket_scale;
8989

9090
std::vector<int> bucket_idx(candidates->size);
9191
std::vector<int> histo(nbuckets, 0);
9292

9393
for (int i = 0; i < (int)candidates->size; ++i) {
9494
const float val = candidates->data[i].logit;
95-
int ib = int(bucket_scale * val + bucker_inter); //nbuckets * (val - bucket_low) / (bucket_high - bucket_low);
95+
int ib = int(bucket_scale * val + bucket_inter); //nbuckets * (val - bucket_low) / (bucket_high - bucket_low);
9696
ib = std::max(0, std::min(nbuckets-1, ib));
9797
bucket_idx[i] = ib;
9898
++histo[ib];

examples/talk-llama/llama-vocab.cpp

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,6 @@
1616
// helpers
1717
//
1818

19-
static void replace_all(std::string & s, const std::string & search, const std::string & replace) {
20-
std::string result;
21-
for (size_t pos = 0; ; pos += search.length()) {
22-
auto new_pos = s.find(search, pos);
23-
if (new_pos == std::string::npos) {
24-
result += s.substr(pos, s.size() - pos);
25-
break;
26-
}
27-
result += s.substr(pos, new_pos - pos) + replace;
28-
pos = new_pos;
29-
}
30-
s = std::move(result);
31-
}
32-
3319
LLAMA_ATTRIBUTE_FORMAT(1, 2)
3420
static std::string format(const char * fmt, ...) {
3521
va_list ap;
@@ -335,6 +321,21 @@ struct llm_tokenizer_spm {
335321

336322
// TODO: there are a lot of common parts between spm and bpe tokenizers, should be refactored and reused
337323

324+
template<typename T, typename Container = std::vector<T>, typename Compare = std::less<typename Container::value_type>>
325+
class llama_priority_queue : public std::priority_queue<T, Container, Compare> {
326+
public:
327+
using std::priority_queue<T, Container, Compare>::priority_queue;
328+
329+
T pop_move() {
330+
T item = std::move(this->c.front());
331+
std::pop_heap(this->c.begin(), this->c.end(), this->comp);
332+
this->c.pop_back();
333+
return item;
334+
}
335+
336+
void pop() = delete;
337+
};
338+
338339
struct llm_bigram_bpe {
339340
struct comparator {
340341
bool operator()(const llm_bigram_bpe & l, const llm_bigram_bpe & r) const {
@@ -343,7 +344,7 @@ struct llm_bigram_bpe {
343344
};
344345

345346
using queue_storage = std::vector<llm_bigram_bpe>;
346-
using queue = std::priority_queue<llm_bigram_bpe, queue_storage, comparator>;
347+
using queue = llama_priority_queue<llm_bigram_bpe, queue_storage, comparator>;
347348
llm_symbol::index left;
348349
llm_symbol::index right;
349350
std::string text;
@@ -402,6 +403,7 @@ struct llm_tokenizer_bpe {
402403
case LLAMA_VOCAB_PRE_TYPE_COMMAND_R:
403404
case LLAMA_VOCAB_PRE_TYPE_SMOLLM:
404405
case LLAMA_VOCAB_PRE_TYPE_CODESHELL:
406+
case LLAMA_VOCAB_PRE_TYPE_EXAONE:
405407
regex_exprs = {
406408
"\\p{N}",
407409
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
@@ -424,6 +426,8 @@ struct llm_tokenizer_bpe {
424426
};
425427
break;
426428
case LLAMA_VOCAB_PRE_TYPE_PORO:
429+
case LLAMA_VOCAB_PRE_TYPE_BLOOM:
430+
case LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH:
427431
regex_exprs = {
428432
" ?[^(\\s|.,!?…。,、।۔،)]+",
429433
};
@@ -531,8 +535,7 @@ struct llm_tokenizer_bpe {
531535

532536
// build token(s)
533537
while (!work_queue.empty()) {
534-
auto bigram = work_queue.top();
535-
work_queue.pop();
538+
auto bigram = work_queue.pop_move();
536539

537540
auto & left_symbol = symbols[bigram.left];
538541
auto & right_symbol = symbols[bigram.right];
@@ -1480,11 +1483,11 @@ llama_token llama_token_pad_impl(const struct llama_vocab & vocab) {
14801483
return vocab.special_pad_id;
14811484
}
14821485

1483-
int32_t llama_add_bos_token_impl(const struct llama_vocab & vocab) {
1486+
bool llama_add_bos_token_impl(const struct llama_vocab & vocab) {
14841487
return vocab.tokenizer_add_bos;
14851488
}
14861489

1487-
int32_t llama_add_eos_token_impl(const struct llama_vocab & vocab) {
1490+
bool llama_add_eos_token_impl(const struct llama_vocab & vocab) {
14881491
return vocab.tokenizer_add_eos;
14891492
}
14901493

examples/talk-llama/llama-vocab.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ llama_token llama_token_sep_impl(const struct llama_vocab & vocab);
9595
llama_token llama_token_nl_impl (const struct llama_vocab & vocab);
9696
llama_token llama_token_pad_impl(const struct llama_vocab & vocab);
9797

98-
int32_t llama_add_bos_token_impl(const struct llama_vocab & vocab);
99-
int32_t llama_add_eos_token_impl(const struct llama_vocab & vocab);
98+
bool llama_add_bos_token_impl(const struct llama_vocab & vocab);
99+
bool llama_add_eos_token_impl(const struct llama_vocab & vocab);
100100

101101
llama_token llama_token_prefix_impl(const struct llama_vocab & vocab);
102102
llama_token llama_token_middle_impl(const struct llama_vocab & vocab);

0 commit comments

Comments
 (0)