Skip to content

Commit e8fe3e2

Browse files
committed
fix names in training scripts
1 parent 1306731 commit e8fe3e2

13 files changed

+35
-88
lines changed

examples/grpo/train_arc_1d.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
"""
44
# install
5-
vf-install vf-reasoning-gym (-p /path/to/environments)
5+
vf-install reasoning-gym (-p /path/to/environments)
66
77
# quick eval
8-
vf-eval vf-reasoning-gym (-m model_name in endpoints.py)
8+
vf-eval reasoning-gym (-m model_name in endpoints.py)
99
1010
inference:
1111
CUDA_VISIBLE_DEVICES=0,1,2,3 vf-vllm --model willcb/Qwen3-14B-Arc-1D-SFT \
@@ -22,7 +22,7 @@
2222
model, tokenizer = vf.get_model_and_tokenizer(model_name)
2323

2424
vf_env = vf.load_environment(
25-
env_id="vf-reasoning-gym", gym="arc_1d", num_samples=4000, seed=1
25+
env_id="reasoning-gym", gym="arc_1d", num_samples=4000, seed=1
2626
)
2727

2828
run_name = f"arc_1d-grpo-{size}"

examples/grpo/train_continuation_quality.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
"""
44
# install
5-
vf-install vf-continuation-quality (-p /path/to/environments)
5+
vf-install continuation-quality (-p /path/to/environments)
66
77
# quick eval
8-
vf-eval vf-continuation-quality (-m model_name in endpoints.py)
8+
vf-eval continuation-quality (-m model_name in endpoints.py)
99
1010
inference:
1111
CUDA_VISIBLE_DEVICES=0 vf-vllm --model Qwen/Qwen2.5-0.5B \
@@ -17,7 +17,7 @@
1717
"""
1818

1919
model_name = "Qwen/Qwen2.5-0.5B"
20-
vf_env = vf.load_environment(env_id="vf-continuation-quality")
20+
vf_env = vf.load_environment(env_id="continuation-quality")
2121
model, tokenizer = vf.get_model_and_tokenizer(model_name)
2222
trainer = vf.GRPOTrainer(
2323
env=vf_env,

examples/grpo/train_gsm8k.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
"""
44
# install
5-
vf-install vf-gsm8k (-p /path/to/environments)
5+
vf-install gsm8k (-p /path/to/environments)
66
77
# quick eval
8-
vf-eval vf-gsm8k (-m model_name in endpoints.py)
8+
vf-eval gsm8k (-m model_name in endpoints.py)
99
1010
inference:
1111
CUDA_VISIBLE_DEVICES=0 vf-vllm --model willcb/Qwen3-0.6B --enforce-eager --disable-log-requests
@@ -15,7 +15,7 @@
1515
--config-file configs/zero3.yaml examples/grpo/train_gsm8k.py
1616
"""
1717

18-
vf_env = vf.load_environment(env_id="vf-gsm8k", num_eval_examples=100)
18+
vf_env = vf.load_environment(env_id="gsm8k", num_eval_examples=100)
1919

2020
model_name = "willcb/Qwen3-0.6B"
2121
run_name = "gsm8k-grpo_" + model_name.split("/")[-1].lower()

examples/grpo/train_hotpotqa.py

Lines changed: 0 additions & 53 deletions
This file was deleted.

examples/grpo/train_math_group.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
"""
44
# install
5-
vf-install vf-math-group (-p /path/to/environments)
5+
vf-install math-group (-p /path/to/environments)
66
77
# quick eval
8-
vf-eval vf-math-group (-m model_name in endpoints.py)
8+
vf-eval math-group (-m model_name in endpoints.py)
99
1010
inference:
1111
CUDA_VISIBLE_DEVICES=0 vf-vllm --model willcb/Qwen3-0.6B \
@@ -16,7 +16,7 @@
1616
--config-file configs/zero3.yaml examples/grpo/train_math_group.py
1717
"""
1818

19-
vf_env = vf.load_environment(env_id="vf-math-group")
19+
vf_env = vf.load_environment(env_id="math-group")
2020

2121
model_name = "willcb/Qwen3-0.6B"
2222
model, tokenizer = vf.get_model_and_tokenizer(model_name)

examples/grpo/train_math_python.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
"""
44
# install
5-
vf-install vf-math-python (-p /path/to/environments)
5+
vf-install math-python (-p /path/to/environments)
66
77
# eval
8-
vf-eval vf-math-python (-m model_name in endpoints.py)
8+
vf-eval math-python (-m model_name in endpoints.py)
99
1010
# inference
1111
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 vf-vllm --model 'willcb/Qwen3-1.7B' \
@@ -17,7 +17,7 @@
1717
--config-file configs/zero3.yaml examples/grpo/train_math_python.py
1818
"""
1919

20-
vf_env = vf.load_environment(env_id="vf-math-python")
20+
vf_env = vf.load_environment(env_id="math-python")
2121

2222
model_name = "willcb/Qwen3-1.7B"
2323
model, tokenizer = vf.get_model_and_tokenizer(model_name)

examples/grpo/train_reverse_text.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
"""
44
# install
5-
vf-install vf-reverse-text (-p /path/to/environments)
5+
vf-install reverse-text (-p /path/to/environments)
66
77
# quick eval
8-
vf-eval vf-reverse-text (-m model_name in endpoints.py)
8+
vf-eval reverse-text (-m model_name in endpoints.py)
99
1010
inference:
1111
CUDA_VISIBLE_DEVICES=0 vf-vllm --model willcb/Qwen2.5-0.5B-Reverse-SFT \
@@ -19,7 +19,7 @@
1919
model_name = "willcb/Qwen2.5-0.5B-Reverse-SFT"
2020
model, tokenizer = vf.get_model_and_tokenizer(model_name)
2121

22-
vf_env = vf.load_environment(env_id="vf-reverse-text")
22+
vf_env = vf.load_environment(env_id="reverse-text")
2323

2424
args = vf.grpo_defaults(run_name="reverse-text")
2525
args.per_device_train_batch_size = 12

examples/grpo/train_self_reward.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
"""
44
# install
5-
vf-install vf-self-reward (-p /path/to/environments)
5+
vf-install self-reward (-p /path/to/environments)
66
77
# quick eval
8-
vf-eval vf-self-reward (-m model_name in endpoints.py)
8+
vf-eval self-reward (-m model_name in endpoints.py)
99
1010
inference:
1111
CUDA_VISIBLE_DEVICES=0 vf-vllm --model Qwen/Qwen2.5-7B-Instruct \
@@ -17,7 +17,7 @@
1717
"""
1818

1919
model_name = "Qwen/Qwen2.5-7B-Instruct"
20-
vf_env = vf.load_environment(env_id="vf-self-reward", model_name=model_name)
20+
vf_env = vf.load_environment(env_id="self-reward", model_name=model_name)
2121
model, tokenizer = vf.get_model_and_tokenizer(model_name)
2222
trainer = vf.GRPOTrainer(
2323
env=vf_env,

examples/grpo/train_sentence_repeater.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
"""
44
# install
5-
vf-install vf-sentence-repeater (-p /path/to/environments)
5+
vf-install sentence-repeater (-p /path/to/environments)
66
77
# quick eval
8-
vf-eval vf-sentence-repeater (-m model_name in endpoints.py)
8+
vf-eval sentence-repeater (-m model_name in endpoints.py)
99
1010
inference:
1111
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5 vf-vllm --model Qwen/Qwen2.5-1.5B-Instruct \
@@ -19,7 +19,7 @@
1919
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
2020
model, tokenizer = vf.get_model_and_tokenizer(model_name)
2121

22-
vf_env = vf.load_environment(env_id="vf-sentence-repeater")
22+
vf_env = vf.load_environment(env_id="sentence-repeater")
2323

2424
run_name = "sentence-repeater-grpo-qwen1.5b"
2525
training_args = vf.grpo_defaults(run_name=run_name)

examples/grpo/train_tool_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
"""
44
# install
5-
vf-install vf-tool-test (-p /path/to/environments)
5+
vf-install tool-test (-p /path/to/environments)
66
77
# quick eval
8-
vf-eval vf-tool-test (-m model_name in endpoints.py)
8+
vf-eval tool-test (-m model_name in endpoints.py)
99
1010
inference:
1111
CUDA_VISIBLE_DEVICES=0 vf-vllm --model willcb/Qwen3-0.6B \
@@ -17,7 +17,7 @@
1717
--config-file configs/zero3.yaml examples/grpo/train_tool_test.py
1818
"""
1919

20-
vf_env = vf.load_environment(env_id="vf-tool-test", num_eval_examples=100)
20+
vf_env = vf.load_environment(env_id="tool-test", num_eval_examples=100)
2121

2222
model_name = "willcb/Qwen3-0.6B"
2323
run_name = "tool-test_" + model_name.split("/")[-1].lower()

0 commit comments

Comments
 (0)