forked from mindspore-lab/mindformers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_mae_vit_base_p16_224_800ep.yaml
191 lines (176 loc) · 4.7 KB
/
run_mae_vit_base_p16_224_800ep.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
seed: 0
run_mode: 'train'
output_dir: './output' # 当前不支持自定义修改,请勿修改该默认值
load_checkpoint: ''
src_strategy_path_or_dir: ''
auto_trans_ckpt: False # If true, auto transform load_checkpoint to load in distributed model
only_save_strategy: False
resume_training: False
# context
context:
mode: 0 #0--Graph Mode; 1--Pynative Mode
device_target: "Ascend"
enable_graph_kernel: False
graph_kernel_flags: "--opt_level=0"
max_call_depth: 10000
save_graphs: False
device_id: 0
# aicc
remote_save_url: "Please input obs url on AICC platform."
# runner
runner_config:
epochs: 10
batch_size: 64
image_size: 224
sink_mode: True
sink_size: 2
runner_wrapper:
type: MFTrainOneStepCell
scale_sense:
type: FixedLossScaleUpdateCell
loss_scale_value: 1024
# parallel
use_parallel: False
parallel:
parallel_mode: 0 # 0-data parallel, 1-semi-auto parallel, 2-auto parallel, 3-hybrid parallel
gradients_mean: True
enable_alltoall: False
full_batch: False
search_mode: "sharding_propagation"
enable_parallel_optimizer: False
strategy_ckpt_save_file: "./ckpt_strategy.ckpt"
parallel_config:
data_parallel: 1
model_parallel: 1
expert_parallel: 1
pipeline_stage: 1
micro_batch_num: 1
gradient_aggregation_group: 4
micro_batch_interleave_num: 1
# moe
moe_config:
expert_num: 1
capacity_factor: 1.05
aux_loss_factor: 0.05
num_experts_chosen: 1
# recompute
recompute_config:
recompute: False
parallel_optimizer_comm_recompute: False
mp_comm_recompute: True
recompute_slice_activation: False
# autotune
auto_tune: False
filepath_prefix: './autotune'
autotune_per_step: 10
# profile
profile: False
profile_start_step: 1
profile_stop_step: 10
init_start_profile: False
profile_communication: False
profile_memory: True
# Trainer
trainer:
type: masked_image_modeling
model_name: 'mae_vit_base_p16'
# train dataset
train_dataset: &train_dataset
seed: 2022
batch_size: 64
data_loader:
type: ImageFolderDataset
dataset_dir: "imageNet-1k/train"
num_parallel_workers: 8
shuffle: True
transforms:
- type: RandomCropDecodeResize
size: 224
scale: [0.2, 1.0]
interpolation: cubic
- type: RandomHorizontalFlip
prob: 0.5
- type: Normalize
mean: [123.675, 118.575, 103.53]
std: [58.395, 62.22, 57.375]
- type: HWC2CHW
mask_policy:
type: MaeMask
input_size: 224
patch_size: 16
mask_ratio: 0.75
input_columns: ["image"]
output_columns: ["image", "mask", "ids_restore", "unmask_index"]
column_order: ["image", "mask", "ids_restore", "unmask_index"]
num_parallel_workers: 8
python_multiprocessing: False
drop_remainder: True
repeat: 1
numa_enable: False
prefetch_size: 30
train_dataset_task:
type: MIMDataset
dataset_config: *train_dataset
# model
model:
arch:
type: ViTMAEForPreTraining
model_config:
type: ViTMAEConfig
mask_ratio: 0.75 # mask ratio
image_size: 224 # input image size
patch_size: 16 # patch size
num_channels: 3 # channels of input images
initializer_range: 0.02 # initial std of cls_tokens and mask_tokens
hidden_size: 768 # embedding dimension
num_hidden_layers: 12 # number of transformer blocks
num_attention_heads: 12 # number of attention heads
intermediate_size: 3072 # ffn_hidden_size of encoder
qkv_bias: True # if has bias in qkv dense
hidden_act: gelu # activation of MLP
post_layernorm_residual: False # if using post layernorm residual
layer_norm_eps: 0.000001 # eps of layer_norm
attention_probs_dropout_prob: 0. # drop rate of Attention
hidden_dropout_prob: 0. # drop rate of MLP
decoder_hidden_size: 512 # decoder embedding dim
decoder_num_hidden_layers: 8 # number of decoder transformer blocks
decoder_num_attention_heads: 16 # number of decoder attention heads
decoder_intermediate_size: 2048 # ffn_hidden_size of decoder
norm_pix_loss: True # if using norm pixel loss
checkpoint_name_or_path: 'mae_vit_base_p16'
# lr sechdule
lr_schedule:
type: cosine
learning_rate: 0.00015
lr_end: 0.
warmup_lr_init: 0.
warmup_epochs: 40
total_steps: -1 # -1 means it will load the total steps of the dataset
layer_scale: False
layer_decay: 0.65
# optimizer
optimizer:
type: adamw
beta1: 0.9
beta2: 0.95
eps: 0.00000001 # 1e-8
weight_decay: 0.05
lr_scale: True
lr_scale_factor: 256
# callbacks
callbacks:
- type: MFLossMonitor
- type: CheckpointMointor
prefix: "mindformers"
save_checkpoint_steps: 100
integrated_save: True
async_save: False
- type: ObsMonitor
eval_callbacks:
- type: ObsMonitor
# processor
processor:
type: ViTMAEProcessor
image_processor:
type: ViTMAEImageProcessor
size: 224 # input image size