Skip to content

Commit 1df19d2

Browse files
authored
Merge pull request #53 from aws/g5_nova_recipies
Add G5 Nova SFT lora recipe
2 parents 32b40c4 + ddbc779 commit 1df19d2

File tree

1 file changed

+46
-0
lines changed

1 file changed

+46
-0
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Note:
2+
# This recipe is currently supported only on Amazon SageMaker training jobs.
3+
4+
5+
## Run config
6+
run:
7+
name: "my-lora-run" # A descriptive name for your training job
8+
model_type: "amazon.nova-micro-v1:0:128k" # Model variant specification, do not change
9+
model_name_or_path: "nova-micro/prod" # Base model path, do not change
10+
replicas: 1 # Number of compute instances for training, allowed value is 1
11+
data_s3_path: "" # Customer data path
12+
output_s3_path: "" # Output artifact path, Sagemaker Hyperpod job-specific configuration - not compatible with standard Sagemaker Training jobs
13+
14+
## Training specific configs
15+
training_config:
16+
max_length: 8196 # Maximum context window size (tokens). Should be between [1024, 8192] and multiple of 1024.
17+
global_batch_size: 64 # Global batch size, allowed values are 16, 32, 64
18+
19+
trainer:
20+
max_epochs: 2 # Number of training epochs
21+
22+
model:
23+
hidden_dropout: 0.0 # Dropout for hidden states, must be between 0.0 and 1.0
24+
attention_dropout: 0.0 # Dropout for attention weights, must be between 0.0 and 1.0
25+
ffn_dropout: 0.0 # Dropout for feed-forward networks, must be between 0.0 and 1.0
26+
27+
optim:
28+
lr: 1e-5 # Learning rate
29+
name: distributed_fused_adam # Optimizer algorithm, do not change
30+
adam_w_mode: true # Enable AdamW mode
31+
eps: 1e-06 # Epsilon for numerical stability
32+
weight_decay: 0.0 # L2 regularization strength, must be between 0.0 and 1.0
33+
betas: # Adam optimizer betas, must be between 0.0 and 1.0
34+
- 0.9
35+
- 0.999
36+
sched:
37+
warmup_steps: 10 # Learning rate warmup steps
38+
constant_steps: 0 # Steps at constant learning rate
39+
min_lr: 1e-6 # Minimum learning rate
40+
41+
peft:
42+
peft_scheme: "lora" # Enable LoRA for parameter-efficient fine-tuning
43+
lora_tuning:
44+
loraplus_lr_ratio: 8.0 # LoRA+ learning rate scaling factor, must be between 0.0 and 100.0
45+
alpha: 32 # Scaling factor for LoRA weights. Allowed values are 32, 64, 96, 128, 160 and 192
46+
adapter_dropout: 0.01 # Regularization for LoRA parameters. Must be between 0.0 and 1.0

0 commit comments

Comments
 (0)