Skip to content

Commit 35ce3f6

Browse files
committed
Merge branch 'master' into Nexes_CQ_10
2 parents 6480054 + 8c475b9 commit 35ce3f6

File tree

16 files changed

+78
-22
lines changed

16 files changed

+78
-22
lines changed

.github/workflows/build.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ concurrency:
1919
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
2020
cancel-in-progress: true
2121

22+
# Fine-grant permission
23+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
24+
permissions:
25+
contents: write # for creating release
26+
2227
env:
2328
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
2429
GGML_NLOOP: 3

.github/workflows/close-issue.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@ on:
33
schedule:
44
- cron: "42 0 * * *"
55

6+
# Fine-grant permission
7+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
8+
permissions:
9+
issues: write
10+
611
jobs:
712
close-issues:
813
runs-on: ubuntu-latest

.github/workflows/nix-ci-aarch64.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@ concurrency:
2121
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
2222
cancel-in-progress: true
2323

24+
# Fine-grant permission
25+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
26+
permissions:
27+
# https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
28+
id-token: write
29+
contents: read
30+
2431
jobs:
2532
nix-build-aarch64:
2633
runs-on: ubuntu-latest

.github/workflows/nix-ci.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@ concurrency:
1212
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
1313
cancel-in-progress: true
1414

15+
# Fine-grant permission
16+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
17+
permissions:
18+
# https://github.com/DeterminateSystems/nix-installer-action?tab=readme-ov-file#with-flakehub
19+
id-token: write
20+
contents: read
21+
1522
jobs:
1623
nix-eval:
1724
strategy:

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
169169
- [AIKit](https://github.com/sozercan/aikit) (MIT)
170170
- [LARS - The LLM & Advanced Referencing Solution](https://github.com/abgulati/LARS) (AGPL)
171171
- [LLMUnity](https://github.com/undreamai/LLMUnity) (MIT)
172+
- [Llama Assistant](https://github.com/vietanhdev/llama-assistant) (GPL)
172173

173174
*(to have a project listed here, it should clearly state that it depends on `llama.cpp`)*
174175

ci/run.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#/bin/bash
1+
#!/bin/bash
22
#
33
# sample usage:
44
#
@@ -751,7 +751,8 @@ function gg_run_rerank_tiny {
751751

752752
model_f16="${path_models}/ggml-model-f16.gguf"
753753

754-
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s><s>hi\nwhat is panda?</s><s>it's a bear\nwhat is panda?</s><s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
754+
# for this model, the SEP token is "</s>"
755+
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s></s>hi\nwhat is panda?</s></s>it's a bear\nwhat is panda?</s></s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
755756

756757
# sample output
757758
# rerank score 0: 0.029
@@ -774,7 +775,7 @@ function gg_run_rerank_tiny {
774775

775776
check_score "rerank score 0" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 0")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
776777
check_score "rerank score 1" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 1")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
777-
check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.15" | tee -a $OUT/${ci}-rk-f16.log
778+
check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.30" | tee -a $OUT/${ci}-rk-f16.log
778779

779780
set +e
780781
}

common/arg.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
911911
).set_sparam());
912912
add_opt(llama_arg(
913913
{"-s", "--seed"}, "SEED",
914-
format("RNG seed (default: %u, use random seed for %u)", params.sparams.seed, LLAMA_DEFAULT_SEED),
914+
format("RNG seed (default: %d, use random seed for %d)", params.sparams.seed, LLAMA_DEFAULT_SEED),
915915
[](gpt_params & params, const std::string & value) {
916916
params.sparams.seed = std::stoul(value);
917917
}

common/common.cpp

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,31 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
838838
return iparams;
839839
}
840840

841+
if (params.reranking) {
842+
bool ok = true;
843+
844+
if (llama_token_bos(model) == LLAMA_TOKEN_NULL) {
845+
LOG_WRN("%s: warning: model does not have a BOS token, reranking will not work\n", __func__);
846+
ok = false;
847+
}
848+
849+
if (llama_token_eos(model) == LLAMA_TOKEN_NULL) {
850+
LOG_WRN("%s: warning: model does not have an EOS token, reranking will not work\n", __func__);
851+
ok = false;
852+
}
853+
854+
if (llama_token_sep(model) == LLAMA_TOKEN_NULL) {
855+
LOG_WRN("%s: warning: model does not have a SEP token, reranking will not work\n", __func__);
856+
ok = false;
857+
}
858+
859+
if (!ok) {
860+
llama_free_model(model);
861+
862+
return iparams;
863+
}
864+
}
865+
841866
auto cparams = llama_context_params_from_gpt_params(params);
842867

843868
llama_context * lctx = llama_new_context_with_model(model, cparams);
@@ -855,6 +880,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
855880
if (cvec.n_embd == -1) {
856881
llama_free(lctx);
857882
llama_free_model(model);
883+
858884
return iparams;
859885
}
860886

@@ -867,6 +893,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
867893
if (err) {
868894
llama_free(lctx);
869895
llama_free_model(model);
896+
870897
return iparams;
871898
}
872899
}
@@ -889,7 +916,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
889916
llama_lora_adapters_apply(lctx, iparams.lora_adapters);
890917
}
891918

892-
if (params.sparams.ignore_eos && llama_token_eos(model) == -1) {
919+
if (params.sparams.ignore_eos && llama_token_eos(model) == LLAMA_TOKEN_NULL) {
893920
LOG_WRN("%s: warning: model does not have an EOS token, ignoring --ignore-eos\n", __func__);
894921
params.sparams.ignore_eos = false;
895922
}
@@ -930,6 +957,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
930957

931958
iparams.model = model;
932959
iparams.context = lctx;
960+
933961
return iparams;
934962
}
935963

examples/server/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ The project is under active development, and we are [looking for feedback and co
100100
| Argument | Explanation |
101101
| -------- | ----------- |
102102
| `--samplers SAMPLERS` | samplers that will be used for generation in the order, separated by ';'<br/>(default: top_k;tfs_z;typ_p;top_p;min_p;temperature) |
103-
| `-s, --seed SEED` | RNG seed (default: 4294967295, use random seed for 4294967295) |
103+
| `-s, --seed SEED` | RNG seed (default: -1, use random seed for -1) |
104104
| `--sampling-seq SEQUENCE` | simplified sequence for samplers that will be used (default: kfypmt) |
105105
| `--ignore-eos` | ignore end of stream token and continue generating (implies --logit-bias EOS-inf) |
106106
| `--penalize-nl` | penalize newline tokens (default: false) |

examples/server/server.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2027,15 +2027,15 @@ struct server_context {
20272027
continue;
20282028
}
20292029

2030-
// prompt: <s>query</s><s>doc</s>
2030+
// prompt: [BOS]query[EOS][SEP]doc[EOS]
20312031
prompt_tokens.clear();
20322032
prompt_tokens.push_back(llama_token_bos(model));
20332033
{
20342034
const auto part = tokenize(slot.prompt[0], false);
20352035
prompt_tokens.insert(prompt_tokens.end(), part.begin(), part.end());
20362036
}
20372037
prompt_tokens.push_back(llama_token_eos(model));
2038-
prompt_tokens.push_back(llama_token_bos(model));
2038+
prompt_tokens.push_back(llama_token_sep(model));
20392039
{
20402040
const auto part = tokenize(slot.prompt[1], false);
20412041
prompt_tokens.insert(prompt_tokens.end(), part.begin(), part.end());

0 commit comments

Comments
 (0)