Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
251 changes: 251 additions & 0 deletions tests/operators/test_speculate_get_token_penalty_multi_scores.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import numpy as np
import paddle

from fastdeploy.model_executor.ops.gpu import speculate_get_token_penalty_multi_scores


def min_length_logits_process(
logits,
cur_len,
min_len,
eos_token_id,
output_padding_offset,
output_cum_offsets,
token_num,
bs,
length,
end_length,
max_seq_len,
):
for token_idx in range(token_num):
bi = (token_idx + output_padding_offset[token_idx]) / max_seq_len
bi = bi.astype(paddle.int32)
if bi >= bs:
continue
query_start_token_idx = bi * max_seq_len - output_cum_offsets[bi]

if cur_len[bi] < 0:
continue
if cur_len[bi] + (token_idx - query_start_token_idx) < min_len[bi]:
for i in range(end_length):
logits[token_idx][eos_token_id[i]] = -1e10


def update_repeat_times(
pre_ids, cur_len, repeat_times, output_padding_offset, token_num, bs, length, length_id, max_seq_len
):
for token_idx in range(token_num):
bi = (token_idx + output_padding_offset[token_idx]) / max_seq_len
bi = bi.astype(paddle.int32)
if bi >= bs:
continue
if cur_len[bi] < 0:
continue

pre_ids_now = pre_ids[bi]
repeat_times_now = repeat_times[token_idx]

for i in range(length_id):
id = pre_ids_now[i]
if id < 0:
break
repeat_times_now[id] = repeat_times_now[id] + 1


def update_value_by_repeat_times(
repeat_times,
penalty_scores,
frequency_score,
presence_score,
temperatures,
logits,
output_padding_offset,
token_num,
bs,
length,
max_seq_len,
):
for token_idx in range(token_num):
bi = (token_idx + output_padding_offset[token_idx]) / max_seq_len
bi = bi.astype(paddle.int32)
if bi >= bs:
continue
logits_now = logits[token_idx]
repeat_times_now = repeat_times[token_idx]
alpha = penalty_scores[bi]
beta = frequency_score[bi]
gamma = presence_score[bi]
for i in range(length):
times = repeat_times_now[i]
logit_now = logits_now[i]
if times != 0:
logit_now = logit_now * alpha if logit_now < 0 else logit_now / alpha
logit_now = logit_now - times * beta - gamma

logits_now[i] = logit_now / temperatures[bi]


def ban_bad_words(logits, bad_words_list, output_padding_offset, token_num, bs, length, bad_words_length, max_seq_len):
for token_idx in range(token_num):
bi = (token_idx + output_padding_offset[token_idx]) / max_seq_len
bi = bi.astype(paddle.int32)
if bi >= bs:
continue
logits_now = logits[token_idx]
for i in range(bad_words_length):
bad_words_token_id = bad_words_list[i]
if bad_words_token_id >= length or bad_words_token_id < 0:
continue
logits_now[bad_words_token_id] = -1e10


def speculate_get_token_penalty_multi_scores_ref(
pre_ids,
logits,
penalty_scores,
frequency_score,
presence_score,
temperatures,
bad_tokens,
cur_len,
min_len,
eos_token_id,
seq_lens_this_time,
output_padding_offset,
output_cum_offsets,
max_seq_len,
):
shape = logits.shape
repeat_times = paddle.full(shape, 0, dtype=paddle.int32)
bs = seq_lens_this_time.shape[0]
token_num = shape[0]
length = shape[1]
length_id = pre_ids.shape[1]
length_bad_words = bad_tokens.shape[1]

end_length = eos_token_id.shape[0]

min_length_logits_process(
logits,
cur_len,
min_len,
eos_token_id,
output_padding_offset,
output_cum_offsets,
token_num,
bs,
length,
end_length,
max_seq_len,
)

update_repeat_times(
pre_ids, cur_len, repeat_times, output_padding_offset, token_num, bs, length, length_id, max_seq_len
)

update_value_by_repeat_times(
repeat_times,
penalty_scores,
frequency_score,
presence_score,
temperatures,
logits,
output_padding_offset,
token_num,
bs,
length,
max_seq_len,
)

ban_bad_words(logits, bad_tokens, output_padding_offset, token_num, bs, length, length_bad_words, max_seq_len)


class TestSpeculateGetTokenPenaltyMultiScores(unittest.TestCase):
def test_speculate_get_token_penalty_multi_scores(self):
paddle.seed(2023)
np.random.seed(2023)

bs = 64
max_seq_len = 1024 # 1024 #2048 #8192
data_type = "float32"

# prepare output_padding_offset and output_cum_offsets
tokens = [1] * bs
token_num = np.sum(tokens)
output_padding_offset = []
output_cum_offsets = [0]
opo_offset = 0
for bid in range(bs):
ts = tokens[bid]
for i in range(ts):
output_padding_offset.append(opo_offset)
opo_offset += max_seq_len - ts
output_cum_offsets.append(opo_offset)
output_cum_offsets = output_cum_offsets[:-1]
output_padding_offset = paddle.to_tensor(output_padding_offset, "int32")
output_cum_offsets = paddle.to_tensor(output_cum_offsets, "int32")

# prepare pre_ids and logits
pre_ids_len = 122
logits_len = 110
pre_ids = np.random.randint(1, logits_len, size=(bs, pre_ids_len))
negative_start = np.random.randint(1, pre_ids_len + 1, size=(bs))
for i in range(bs):
pre_ids[:, negative_start[i] :] = -1
pre_ids = paddle.to_tensor(pre_ids).astype("int64")
logits = paddle.zeros([token_num, logits_len]).astype(data_type)
# prepare other params
penalty_scores = paddle.to_tensor(np.random.random([bs])).astype(data_type)
frequency_scores = paddle.to_tensor(np.random.random([bs])).astype(data_type)
presence_scores = paddle.to_tensor(np.random.random([bs])).astype(data_type)
temperatures = paddle.to_tensor(np.random.random([bs])).astype("float32")
bad_tokens = paddle.to_tensor(np.random.randint(1, 2, size=([bs, 1]))).astype("int64")
cur_len = paddle.to_tensor(np.random.randint(1, 50, size=(bs))).astype("int64")
min_len = paddle.to_tensor(np.random.randint(1, 50, size=(bs))).astype("int64")
eos_token_id = paddle.to_tensor(np.random.randint(1, 64, size=(bs))).astype("int64")
seq_len_this_time = paddle.to_tensor(
np.random.randint(0, 1, size=(bs)), "int32"
) # value of seq_len_this_time is useless

inputs = (
pre_ids,
logits,
penalty_scores,
frequency_scores,
presence_scores,
temperatures,
bad_tokens,
cur_len,
min_len,
eos_token_id,
seq_len_this_time,
output_padding_offset,
output_cum_offsets,
max_seq_len,
)
# inplace modify, not return data
inputs_clone = [x.clone() if isinstance(x, paddle.Tensor) else x for x in inputs]
speculate_get_token_penalty_multi_scores(*inputs)
speculate_get_token_penalty_multi_scores_ref(*inputs_clone)

np.testing.assert_allclose(inputs[1].numpy(), inputs_clone[1].numpy(), atol=1e-5, rtol=1e-5)


if __name__ == "__main__":
unittest.main()
Loading