Skip to content

Commit

Permalink
add config file for 2K models;
Browse files Browse the repository at this point in the history
Signed-off-by: lawrence-cj <[email protected]>
  • Loading branch information
lawrence-cj committed Dec 20, 2024
1 parent d367838 commit 374447b
Showing 1 changed file with 109 additions and 0 deletions.
109 changes: 109 additions & 0 deletions configs/sana_config/2048ms/Sana_1600M_img2048.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
data:
data_dir: [data/data_public/dir1]
image_size: 2048
caption_proportion:
prompt: 1
external_caption_suffixes: ['', _InternVL2-26B, _VILA1-5-13B]
external_clipscore_suffixes:
- _InternVL2-26B_clip_score
- _VILA1-5-13B_clip_score
- _prompt_clip_score
clip_thr_temperature: 0.1
clip_thr: 25.0
load_text_feat: false
load_vae_feat: false
transform: default_train
type: SanaWebDatasetMS
sort_dataset: false
# model config
model:
model: SanaMS_1600M_P1_D20
image_size: 2048
mixed_precision: bf16 # ['fp16', 'fp32', 'bf16']
fp32_attention: true
load_from:
resume_from:
aspect_ratio_type: ASPECT_RATIO_2048
multi_scale: true
attn_type: linear
ffn_type: glumbconv
mlp_acts:
- silu
- silu
-
mlp_ratio: 2.5
use_pe: true
pe_interpolation: 1.
qk_norm: false
class_dropout_prob: 0.1
# PAG
pag_applied_layers:
- 8
# VAE setting
vae:
vae_type: dc-ae
vae_pretrained: mit-han-lab/dc-ae-f32c32-sana-1.0
scale_factor: 0.41407
vae_latent_dim: 32
vae_downsample_rate: 32
sample_posterior: true
# text encoder
text_encoder:
text_encoder_name: gemma-2-2b-it
y_norm: true
y_norm_scale_factor: 0.01
model_max_length: 300
# CHI
chi_prompt:
- 'Given a user prompt, generate an "Enhanced prompt" that provides detailed visual descriptions suitable for image generation. Evaluate the level of detail in the user prompt:'
- '- If the prompt is simple, focus on adding specifics about colors, shapes, sizes, textures, and spatial relationships to create vivid and concrete scenes.'
- '- If the prompt is already detailed, refine and enhance the existing details slightly without overcomplicating.'
- 'Here are examples of how to transform or refine prompts:'
- '- User Prompt: A cat sleeping -> Enhanced: A small, fluffy white cat curled up in a round shape, sleeping peacefully on a warm sunny windowsill, surrounded by pots of blooming red flowers.'
- '- User Prompt: A busy city street -> Enhanced: A bustling city street scene at dusk, featuring glowing street lamps, a diverse crowd of people in colorful clothing, and a double-decker bus passing by towering glass skyscrapers.'
- 'Please generate only the enhanced description for the prompt below and avoid including any additional commentary or evaluations:'
- 'User Prompt: '
# Sana schedule Flow
scheduler:
predict_v: true
noise_schedule: linear_flow
pred_sigma: false
flow_shift: 3.0
# logit-normal timestep
weighting_scheme: logit_normal
logit_mean: 0.0
logit_std: 1.0
vis_sampler: flow_dpm-solver
# training setting
train:
num_workers: 10
seed: 1
train_batch_size: 64
num_epochs: 100
gradient_accumulation_steps: 1
grad_checkpointing: true
gradient_clip: 0.1
optimizer:
betas:
- 0.9
- 0.999
- 0.9999
eps:
- 1.0e-30
- 1.0e-16
lr: 0.0001
type: CAMEWrapper
weight_decay: 0.0
lr_schedule: constant
lr_schedule_args:
num_warmup_steps: 2000
local_save_vis: true # if save log image locally
visualize: true
eval_sampling_steps: 500
log_interval: 20
save_model_epochs: 5
save_model_steps: 500
work_dir: output/debug
online_metric: false
eval_metric_step: 2000
online_metric_dir: metric_helper

0 comments on commit 374447b

Please sign in to comment.