Skip to content

Commit 0fc7168

Browse files
committed
Add models
1 parent cc850d8 commit 0fc7168

File tree

2 files changed

+39
-15
lines changed

2 files changed

+39
-15
lines changed

MODEL_TRACKING.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ This document tracks all model weights available in the `/model-weights` directo
4040
| `gemma-2b-it` ||
4141
| `gemma-7b` ||
4242
| `gemma-7b-it` ||
43+
| `gemma-2-2b-it` ||
4344
| `gemma-2-9b` ||
4445
| `gemma-2-9b-it` ||
4546
| `gemma-2-27b` ||

vec_inf/config/models.yaml

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,18 @@ models:
126126
--tensor-parallel-size: 4
127127
--max-model-len: 4096
128128
--max-num-seqs: 256
129+
gemma-2-2b-it:
130+
model_family: gemma-2
131+
model_variant: 2b-it
132+
model_type: LLM
133+
gpus_per_node: 1
134+
num_nodes: 1
135+
vocab_size: 256000
136+
time: 08:00:00
137+
resource_type: l40s
138+
vllm_args:
139+
--max-model-len: 4096
140+
--max-num-seqs: 256
129141
gemma-2-9b:
130142
model_family: gemma-2
131143
model_variant: 9b
@@ -406,8 +418,7 @@ models:
406418
gpus_per_node: 4
407419
num_nodes: 8
408420
vocab_size: 128256
409-
qos: m4
410-
time: 02:00:00
421+
time: 08:00:00
411422
resource_type: l40s
412423
vllm_args:
413424
--pipeline-parallel-size: 8
@@ -557,19 +568,6 @@ models:
557568
--tensor-parallel-size: 2
558569
--max-model-len: 65536
559570
--max-num-seqs: 256
560-
Llama3-OpenBioLLM-70B:
561-
model_family: Llama3-OpenBioLLM
562-
model_variant: 70B
563-
model_type: LLM
564-
gpus_per_node: 4
565-
num_nodes: 1
566-
vocab_size: 128256
567-
time: 08:00:00
568-
resource_type: l40s
569-
vllm_args:
570-
--tensor-parallel-size: 4
571-
--max-model-len: 8192
572-
--max-num-seqs: 256
573571
Llama-3.1-Nemotron-70B-Instruct-HF:
574572
model_family: Llama-3.1-Nemotron
575573
model_variant: 70B-Instruct-HF
@@ -1107,6 +1105,18 @@ models:
11071105
vllm_args:
11081106
--max-model-len: 4096
11091107
--max-num-seqs: 256
1108+
Qwen3-8B:
1109+
model_family: Qwen3
1110+
model_variant: 8B
1111+
model_type: LLM
1112+
gpus_per_node: 1
1113+
num_nodes: 1
1114+
vocab_size: 151936
1115+
time: 08:00:00
1116+
resource_type: l40s
1117+
vllm_args:
1118+
--max-model-len: 40960
1119+
--max-num-seqs: 256
11101120
Qwen3-14B:
11111121
model_family: Qwen3
11121122
model_variant: 14B
@@ -1119,3 +1129,16 @@ models:
11191129
vllm_args:
11201130
--max-model-len: 40960
11211131
--max-num-seqs: 256
1132+
Qwen3-32B:
1133+
model_family: Qwen3
1134+
model_variant: 32B
1135+
model_type: LLM
1136+
gpus_per_node: 2
1137+
num_nodes: 1
1138+
vocab_size: 151936
1139+
time: 08:00:00
1140+
resource_type: l40s
1141+
vllm_args:
1142+
--tensor-parallel-size: 2
1143+
--max-model-len: 40960
1144+
--max-num-seqs: 256

0 commit comments

Comments
 (0)