From 9c30250f2523523bfd3a03520dd94c14410c1524 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Fri, 3 Jan 2025 13:55:57 +0100 Subject: [PATCH] Fix header-level for proper formatting --- notebooks/en/fine_tuning_vlm_trl.ipynb | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/notebooks/en/fine_tuning_vlm_trl.ipynb b/notebooks/en/fine_tuning_vlm_trl.ipynb index d773e8a2..7b3cf7ed 100644 --- a/notebooks/en/fine_tuning_vlm_trl.ipynb +++ b/notebooks/en/fine_tuning_vlm_trl.ipynb @@ -51,7 +51,7 @@ "id": "gSHmDKNFoqjC" }, "source": [ - "# 1. Install Dependencies\n", + "## 1. Install Dependencies\n", "\n", "Let’s start by installing the essential libraries we’ll need for fine-tuning! πŸš€\n" ] @@ -180,7 +180,7 @@ "id": "g9QXwbJ7ovM5" }, "source": [ - "# 2. Load Dataset πŸ“\n", + "## 2. Load Dataset πŸ“\n", "\n", "In this section, we’ll load the [HuggingFaceM4/ChartQA](https://huggingface.co/datasets/HuggingFaceM4/ChartQA) dataset. This dataset contains chart images paired with related questions and answers, making it ideal for training on visual question answering tasks.\n", "\n", @@ -388,7 +388,7 @@ "id": "YY1Y_KDtoycB" }, "source": [ - "# 3. Load Model and Check Performance! πŸ€”\n", + "## 3. Load Model and Check Performance! πŸ€”\n", "\n", "Now that we’ve loaded the dataset, let’s start by loading the model and evaluating its performance using a sample from the dataset. We’ll be using [Qwen/Qwen2-VL-7B-Instruct](https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct), a Vision Language Model (VLM) capable of understanding both visual data and text.\n", "\n", @@ -1165,7 +1165,7 @@ "id": "YIZOIVEzQqNg" }, "source": [ - "# 4. Fine-Tune the Model using TRL\n" + "## 4. Fine-Tune the Model using TRL\n" ] }, { @@ -1174,7 +1174,7 @@ "id": "yIrR9gP2z90z" }, "source": [ - "## 4.1 Load the Quantized Model for Training βš™οΈ\n", + "### 4.1 Load the Quantized Model for Training βš™οΈ\n", "\n", "Next, we’ll load the quantized model using [bitsandbytes](https://huggingface.co/docs/bitsandbytes/main/en/index). If you want to learn more about quantization, check out [this blog post](https://huggingface.co/blog/merve/quantization) or [this one](https://www.maartengrootendorst.com/blog/quantization/).\n" ] @@ -1246,7 +1246,7 @@ "id": "65wfO29isQlX" }, "source": [ - "## 4.2 Set Up QLoRA and SFTConfig πŸš€\n", + "### 4.2 Set Up QLoRA and SFTConfig πŸš€\n", "\n", "Next, we will configure [QLoRA](https://github.com/artidoro/qlora) for our training setup. QLoRA enables efficient fine-tuning of large language models while significantly reducing the memory footprint compared to traditional methods. Unlike standard LoRA, which reduces memory usage by applying a low-rank approximation, QLoRA takes it a step further by quantizing the weights of the LoRA adapters. This leads to even lower memory requirements and improved training efficiency, making it an excellent choice for optimizing our model's performance without sacrificing quality.\n", "\n", @@ -1361,7 +1361,7 @@ "id": "pOUrD9P-y-Kf" }, "source": [ - "## 4.3 Training the Model πŸƒ" + "### 4.3 Training the Model πŸƒ" ] }, { @@ -1556,7 +1556,7 @@ "id": "6yx_sGW42dN3" }, "source": [ - "# 5. Testing the Fine-Tuned Model πŸ”\n", + "## 5. Testing the Fine-Tuned Model πŸ”\n", "\n", "Now that we've successfully fine-tuned our Vision Language Model (VLM), it's time to evaluate its performance! In this section, we will test the model using examples from the ChartQA dataset to see how well it answers questions based on chart images. Let's dive in and explore the results! πŸš€\n", "\n" @@ -1993,7 +1993,7 @@ "id": "daUMWw5xxhSc" }, "source": [ - "# 6. Compare Fine-Tuned Model vs. Base Model + Prompting πŸ“Š\n", + "## 6. Compare Fine-Tuned Model vs. Base Model + Prompting πŸ“Š\n", "\n", "We have explored how fine-tuning the VLM can be a valuable option for adapting it to our specific needs. Another approach to consider is directly using prompting or implementing a RAG system, which is covered in another [recipe](https://huggingface.co/learn/cookbook/multimodal_rag_using_document_retrieval_and_vlms).\n", "\n", @@ -2205,7 +2205,7 @@ "id": "Wgv0-sy8TLPE" }, "source": [ - "# 7. Continuing the Learning Journey πŸ§‘β€πŸŽ“οΈ\n", + "## 7. Continuing the Learning Journey πŸ§‘β€πŸŽ“οΈ\n", "\n", "To further enhance your understanding and skills in working with multimodal models, check out the following resources:\n", "\n",