Skip to content

Commit 9ab02eb

Browse files
authored
Merge pull request #522 from MrForExample/dev
Merge from dev to main - Hunyuan3D 2.1 - PartCrafter with scene generation support
2 parents bffeb83 + b430403 commit 9ab02eb

File tree

277 files changed

+83716
-197
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

277 files changed

+83716
-197
lines changed
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
model:
2+
target: hy3dshape.models.denoisers.hunyuandit.HunYuanDiTPlain
3+
params:
4+
input_size: &num_latents 4096
5+
in_channels: 64
6+
hidden_size: 2048
7+
context_dim: 1024
8+
depth: 21
9+
num_heads: 16
10+
qk_norm: true
11+
text_len: 1370
12+
with_decoupled_ca: false
13+
use_attention_pooling: false
14+
qk_norm_type: 'rms'
15+
qkv_bias: false
16+
use_pos_emb: false
17+
num_moe_layers: 6
18+
num_experts: 8
19+
moe_top_k: 2
20+
21+
vae:
22+
target: hy3dshape.models.autoencoders.ShapeVAE
23+
params:
24+
num_latents: *num_latents
25+
embed_dim: 64
26+
num_freqs: 8
27+
include_pi: false
28+
heads: 16
29+
width: 1024
30+
num_encoder_layers: 8
31+
num_decoder_layers: 16
32+
qkv_bias: false
33+
qk_norm: true
34+
scale_factor: 1.0039506158752403
35+
geo_decoder_mlp_expand_ratio: 4
36+
geo_decoder_downsample_ratio: 1
37+
geo_decoder_ln_post: true
38+
point_feats: 4
39+
pc_size: 81920
40+
pc_sharpedge_size: 0
41+
42+
conditioner:
43+
target: hy3dshape.models.conditioner.SingleImageEncoder
44+
params:
45+
main_image_encoder:
46+
type: DinoImageEncoder # dino large
47+
kwargs:
48+
config:
49+
attention_probs_dropout_prob: 0.0
50+
drop_path_rate: 0.0
51+
hidden_act: gelu
52+
hidden_dropout_prob: 0.0
53+
hidden_size: 1024
54+
image_size: 518
55+
initializer_range: 0.02
56+
layer_norm_eps: 1.e-6
57+
layerscale_value: 1.0
58+
mlp_ratio: 4
59+
model_type: dinov2
60+
num_attention_heads: 16
61+
num_channels: 3
62+
num_hidden_layers: 24
63+
patch_size: 14
64+
qkv_bias: true
65+
torch_dtype: float32
66+
use_swiglu_ffn: false
67+
image_size: 518
68+
use_cls_token: true
69+
70+
scheduler:
71+
target: hy3dshape.schedulers.FlowMatchEulerDiscreteScheduler
72+
params:
73+
num_train_timesteps: 1000
74+
75+
image_processor:
76+
target: hy3dshape.preprocessors.ImageProcessorV2
77+
params:
78+
size: 512
79+
border_ratio: 0.15
80+
81+
pipeline:
82+
target: hy3dshape.pipelines.Hunyuan3DDiTFlowMatchingPipeline
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"crop_size": 224,
3+
"do_center_crop": true,
4+
"do_convert_rgb": true,
5+
"do_normalize": true,
6+
"do_resize": true,
7+
"feature_extractor_type": "CLIPFeatureExtractor",
8+
"image_mean": [
9+
0.48145466,
10+
0.4578275,
11+
0.40821073
12+
],
13+
"image_std": [
14+
0.26862954,
15+
0.26130258,
16+
0.27577711
17+
],
18+
"resample": 3,
19+
"size": 224
20+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"_name_or_path": "vision_encoder",
3+
"architectures": [
4+
"CLIPVisionModelWithProjection"
5+
],
6+
"attention_dropout": 0.0,
7+
"dropout": 0.0,
8+
"hidden_act": "gelu",
9+
"hidden_size": 1280,
10+
"image_size": 224,
11+
"initializer_factor": 1.0,
12+
"initializer_range": 0.02,
13+
"intermediate_size": 5120,
14+
"layer_norm_eps": 1e-05,
15+
"model_type": "clip_vision_model",
16+
"num_attention_heads": 16,
17+
"num_channels": 3,
18+
"num_hidden_layers": 32,
19+
"patch_size": 14,
20+
"projection_dim": 1024,
21+
"torch_dtype": "float16",
22+
"transformers_version": "4.36.0"
23+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"_class_name": "HunyuanPaintPipeline",
3+
"_diffusers_version": "0.24.0",
4+
"feature_extractor": [
5+
"transformers",
6+
"CLIPImageProcessor"
7+
],
8+
"requires_safety_checker": false,
9+
"safety_checker": [
10+
null,
11+
null
12+
],
13+
"scheduler": [
14+
"diffusers",
15+
"DDIMScheduler"
16+
],
17+
"text_encoder": [
18+
"transformers",
19+
"CLIPTextModel"
20+
],
21+
"tokenizer": [
22+
"transformers",
23+
"CLIPTokenizer"
24+
],
25+
"unet": [
26+
"modules",
27+
"UNet2p5DConditionModel"
28+
],
29+
"vae": [
30+
"diffusers",
31+
"AutoencoderKL"
32+
],
33+
"image_encoder": [
34+
"transformers",
35+
"CLIPVisionModelWithProjection"
36+
]
37+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"_class_name": "DDIMScheduler",
3+
"_diffusers_version": "0.23.1",
4+
"beta_end": 0.012,
5+
"beta_schedule": "scaled_linear",
6+
"beta_start": 0.00085,
7+
"clip_sample": false,
8+
"num_train_timesteps": 1000,
9+
"prediction_type": "v_prediction",
10+
"set_alpha_to_one": true,
11+
"steps_offset": 1,
12+
"trained_betas": null,
13+
"timestep_spacing": "trailing",
14+
"rescale_betas_zero_snr": true
15+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"_name_or_path": "stabilityai/stable-diffusion-2",
3+
"architectures": [
4+
"CLIPTextModel"
5+
],
6+
"attention_dropout": 0.0,
7+
"bos_token_id": 0,
8+
"dropout": 0.0,
9+
"eos_token_id": 2,
10+
"hidden_act": "gelu",
11+
"hidden_size": 1024,
12+
"initializer_factor": 1.0,
13+
"initializer_range": 0.02,
14+
"intermediate_size": 4096,
15+
"layer_norm_eps": 1e-05,
16+
"max_position_embeddings": 77,
17+
"model_type": "clip_text_model",
18+
"num_attention_heads": 16,
19+
"num_hidden_layers": 23,
20+
"pad_token_id": 1,
21+
"projection_dim": 512,
22+
"torch_dtype": "float32",
23+
"transformers_version": "4.25.0.dev0",
24+
"vocab_size": 49408
25+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{
2+
"bos_token": {
3+
"content": "<|startoftext|>",
4+
"lstrip": false,
5+
"normalized": true,
6+
"rstrip": false,
7+
"single_word": false
8+
},
9+
"eos_token": {
10+
"content": "<|endoftext|>",
11+
"lstrip": false,
12+
"normalized": true,
13+
"rstrip": false,
14+
"single_word": false
15+
},
16+
"pad_token": "!",
17+
"unk_token": {
18+
"content": "<|endoftext|>",
19+
"lstrip": false,
20+
"normalized": true,
21+
"rstrip": false,
22+
"single_word": false
23+
}
24+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
{
2+
"add_prefix_space": false,
3+
"bos_token": {
4+
"__type": "AddedToken",
5+
"content": "<|startoftext|>",
6+
"lstrip": false,
7+
"normalized": true,
8+
"rstrip": false,
9+
"single_word": false
10+
},
11+
"do_lower_case": true,
12+
"eos_token": {
13+
"__type": "AddedToken",
14+
"content": "<|endoftext|>",
15+
"lstrip": false,
16+
"normalized": true,
17+
"rstrip": false,
18+
"single_word": false
19+
},
20+
"errors": "replace",
21+
"model_max_length": 77,
22+
"name_or_path": "stabilityai/stable-diffusion-2",
23+
"pad_token": "<|endoftext|>",
24+
"special_tokens_map_file": "./special_tokens_map.json",
25+
"tokenizer_class": "CLIPTokenizer",
26+
"unk_token": {
27+
"__type": "AddedToken",
28+
"content": "<|endoftext|>",
29+
"lstrip": false,
30+
"normalized": true,
31+
"rstrip": false,
32+
"single_word": false
33+
}
34+
}

0 commit comments

Comments
 (0)