vault backup: 2024-10-23 18:01:40

zhangfuwen · Oct 23, 2024 · c4cc6ed · c4cc6ed
1 parent 84619c6
commit c4cc6ed
Show file tree

Hide file tree

Showing 4 changed files with 49 additions and 9 deletions.
diff --git a/Notes/006-ai/assets/Pasted image 20241023170632.png b/Notes/006-ai/assets/Pasted image 20241023170632.png
diff --git a/Notes/006-ai/assets/Pasted image 20241023170741.png b/Notes/006-ai/assets/Pasted image 20241023170741.png
diff --git a/Notes/006-ai/llamafactory.md b/Notes/006-ai/llamafactory.md
@@ -1,11 +1,6 @@
 
 https://github.com/hiyouga/LLaMA-Factory/issues/5400
 
-
-![](assets/Pasted%20image%2020241023164959.png)
-
-```python
-TOKENIZERS_PARALLELISM=true FORCE_TORCHRUN=1  llamafactory-cli train ...
-```
-
-pip install transformers==4.44.2 accelerate=0.33.0
+```bash
+pip install transformers==4.44.2 accelerate=0.33.0
+```
diff --git a/Notes/006-ai/模型学习技巧.md b/Notes/006-ai/模型学习技巧.md
@@ -3718,7 +3718,52 @@ A100:
 |          |             |         |                    |                             |            |                      |                  |      |                |
 forward算力：2.4GFLOPS * 16 * 1024 = 38.4 TFLOPS(约需2s)，如果gradient_accumulation_steps设成8,那就是19s左右。
 
-
+### 3B模型/8 V100
+
+```yaml
+### model
+#model_name_or_path: /mnt/bn/znzx-public/models/llama32/Llama-3.2-1B-Instruct
+model_name_or_path: /root/Code/PythonScripts/custom_model_code/qwen2-3b-l46-2
+
+### method
+stage: pt
+do_train: true
+#train_from_scratch: true
+finetuning_type: full
+#resume_from_checkpoint: true
+deepspeed: examples/deepspeed/ds_z3_config.json
+#use_badam: false
+
+logging_steps: 10
+save_steps: 1000
+save_total_limit: 5
+num_train_epochs: 100
+### dataset
+dataset: "wiki_zh"
+streaming: true
+max_steps: 3000000
+ignore_data_skip: true
+eval_dataset: "wiki_zh"
+template: qwen
+cutoff_len: 1024
+#max_samples: 50000
+#overwrite_cache: true
+fp16: true
+preprocessing_num_workers: 16
+
+### output
+output_dir: /mnt/bn/znzx-public/lora/saves/custom_qwen3b_l46/
+overwrite_output_dir: true
+
+### eval
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+gradient_accumulation_steps: 1
+
+```
+
+![](assets/Pasted%20image%2020241023170632.png)
+![](assets/Pasted%20image%2020241023170741.png)
 # A100显卡算力
 
 ![](assets/Pasted%20image%2020241009161548.png)