-
Notifications
You must be signed in to change notification settings - Fork 160
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: lawrence-cj <[email protected]>
- Loading branch information
1 parent
d367838
commit 374447b
Showing
1 changed file
with
109 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
data: | ||
data_dir: [data/data_public/dir1] | ||
image_size: 2048 | ||
caption_proportion: | ||
prompt: 1 | ||
external_caption_suffixes: ['', _InternVL2-26B, _VILA1-5-13B] | ||
external_clipscore_suffixes: | ||
- _InternVL2-26B_clip_score | ||
- _VILA1-5-13B_clip_score | ||
- _prompt_clip_score | ||
clip_thr_temperature: 0.1 | ||
clip_thr: 25.0 | ||
load_text_feat: false | ||
load_vae_feat: false | ||
transform: default_train | ||
type: SanaWebDatasetMS | ||
sort_dataset: false | ||
# model config | ||
model: | ||
model: SanaMS_1600M_P1_D20 | ||
image_size: 2048 | ||
mixed_precision: bf16 # ['fp16', 'fp32', 'bf16'] | ||
fp32_attention: true | ||
load_from: | ||
resume_from: | ||
aspect_ratio_type: ASPECT_RATIO_2048 | ||
multi_scale: true | ||
attn_type: linear | ||
ffn_type: glumbconv | ||
mlp_acts: | ||
- silu | ||
- silu | ||
- | ||
mlp_ratio: 2.5 | ||
use_pe: true | ||
pe_interpolation: 1. | ||
qk_norm: false | ||
class_dropout_prob: 0.1 | ||
# PAG | ||
pag_applied_layers: | ||
- 8 | ||
# VAE setting | ||
vae: | ||
vae_type: dc-ae | ||
vae_pretrained: mit-han-lab/dc-ae-f32c32-sana-1.0 | ||
scale_factor: 0.41407 | ||
vae_latent_dim: 32 | ||
vae_downsample_rate: 32 | ||
sample_posterior: true | ||
# text encoder | ||
text_encoder: | ||
text_encoder_name: gemma-2-2b-it | ||
y_norm: true | ||
y_norm_scale_factor: 0.01 | ||
model_max_length: 300 | ||
# CHI | ||
chi_prompt: | ||
- 'Given a user prompt, generate an "Enhanced prompt" that provides detailed visual descriptions suitable for image generation. Evaluate the level of detail in the user prompt:' | ||
- '- If the prompt is simple, focus on adding specifics about colors, shapes, sizes, textures, and spatial relationships to create vivid and concrete scenes.' | ||
- '- If the prompt is already detailed, refine and enhance the existing details slightly without overcomplicating.' | ||
- 'Here are examples of how to transform or refine prompts:' | ||
- '- User Prompt: A cat sleeping -> Enhanced: A small, fluffy white cat curled up in a round shape, sleeping peacefully on a warm sunny windowsill, surrounded by pots of blooming red flowers.' | ||
- '- User Prompt: A busy city street -> Enhanced: A bustling city street scene at dusk, featuring glowing street lamps, a diverse crowd of people in colorful clothing, and a double-decker bus passing by towering glass skyscrapers.' | ||
- 'Please generate only the enhanced description for the prompt below and avoid including any additional commentary or evaluations:' | ||
- 'User Prompt: ' | ||
# Sana schedule Flow | ||
scheduler: | ||
predict_v: true | ||
noise_schedule: linear_flow | ||
pred_sigma: false | ||
flow_shift: 3.0 | ||
# logit-normal timestep | ||
weighting_scheme: logit_normal | ||
logit_mean: 0.0 | ||
logit_std: 1.0 | ||
vis_sampler: flow_dpm-solver | ||
# training setting | ||
train: | ||
num_workers: 10 | ||
seed: 1 | ||
train_batch_size: 64 | ||
num_epochs: 100 | ||
gradient_accumulation_steps: 1 | ||
grad_checkpointing: true | ||
gradient_clip: 0.1 | ||
optimizer: | ||
betas: | ||
- 0.9 | ||
- 0.999 | ||
- 0.9999 | ||
eps: | ||
- 1.0e-30 | ||
- 1.0e-16 | ||
lr: 0.0001 | ||
type: CAMEWrapper | ||
weight_decay: 0.0 | ||
lr_schedule: constant | ||
lr_schedule_args: | ||
num_warmup_steps: 2000 | ||
local_save_vis: true # if save log image locally | ||
visualize: true | ||
eval_sampling_steps: 500 | ||
log_interval: 20 | ||
save_model_epochs: 5 | ||
save_model_steps: 500 | ||
work_dir: output/debug | ||
online_metric: false | ||
eval_metric_step: 2000 | ||
online_metric_dir: metric_helper |