diff --git a/README.md b/README.md
index 0f82ef8..041008f 100644
--- a/README.md
+++ b/README.md
@@ -24,11 +24,11 @@ Welcome to Video Maestro! 🚀 (formerly known as Video Manager), your ultimate
 ## 📸 Screen Shot
 
 <p align="center">
-  <img src="docs/screenshot3.png" alt="Video Maestro Screenshot" width="600">
+  <img src="docs/shot2.png" alt="Video Maestro Screenshot" width="600">
 </p>
 
 <p align="center">
-  <img src="docs/screenshot2.png" alt="Video Maestro Screenshot" width="600">
+  <img src="docs/shot1.png" alt="Video Maestro Screenshot" width="600">
 </p>
 
 ## ✨ Features
diff --git a/docs/screenshot1.png b/docs/screenshot1.png
deleted file mode 100644
index 896bf6e..0000000
Binary files a/docs/screenshot1.png and /dev/null differ
diff --git a/docs/screenshot2.png b/docs/screenshot2.png
deleted file mode 100644
index 7d12a89..0000000
Binary files a/docs/screenshot2.png and /dev/null differ
diff --git a/docs/screenshot3.png b/docs/screenshot3.png
deleted file mode 100644
index 9866c85..0000000
Binary files a/docs/screenshot3.png and /dev/null differ
diff --git a/docs/screenshot4.png b/docs/screenshot4.png
deleted file mode 100644
index 48c0511..0000000
Binary files a/docs/screenshot4.png and /dev/null differ
diff --git a/docs/shot1.png b/docs/shot1.png
new file mode 100644
index 0000000..42138f0
Binary files /dev/null and b/docs/shot1.png differ
diff --git a/docs/shot2.png b/docs/shot2.png
new file mode 100644
index 0000000..770b75c
Binary files /dev/null and b/docs/shot2.png differ
diff --git a/src-tauri/src/gemini.rs b/src-tauri/src/gemini.rs
new file mode 100644
index 0000000..7e5311e
--- /dev/null
+++ b/src-tauri/src/gemini.rs
@@ -0,0 +1,38 @@
+use serde::Deserialize;
+
+#[derive(Deserialize)]
+struct GeminiResponse {
+    candidates: Vec<GeminiCandidate>,
+}
+
+#[derive(Deserialize)]
+#[allow(non_snake_case)]
+struct GeminiCandidate {
+    content: GeminiContent,
+    // finishReason: Option<String>,
+}
+
+#[derive(Deserialize)]
+struct GeminiContent {
+    parts: Vec<GeminiPart>,
+}
+
+#[derive(Deserialize)]
+struct GeminiPart {
+    text: String,
+}
+
+pub fn parse_gemini(chunk: &str) -> Result<String, String> {
+    let gemini_response: GeminiResponse =
+        serde_json::from_str(chunk).map_err(|e| format!("JSON parse error: {}", e))?;
+
+    if let Some(candidate) = gemini_response.candidates.first() {
+        if let Some(part) = candidate.content.parts.first() {
+            Ok(part.text.clone())
+        } else {
+            Err("No parts found in Gemini response".to_string())
+        }
+    } else {
+        Err("No candidates found in Gemini response".to_string())
+    }
+}
diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index 1854491..95683c7 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -2,6 +2,7 @@ pub mod webvtt;
 use dotenv::dotenv;
 use tauri::{Emitter, Manager};
 mod db;
+mod gemini;
 mod setting;
 mod utils;
 mod whisper;
diff --git a/src-tauri/src/setting.rs b/src-tauri/src/setting.rs
index e845784..3abf3b4 100644
--- a/src-tauri/src/setting.rs
+++ b/src-tauri/src/setting.rs
@@ -9,6 +9,7 @@ pub struct AppSettings {
     pub api_key: Option<String>,
     pub ai_url: Option<String>,
     pub ai_model_name: Option<String>,
+    pub whisper_api_key: Option<String>,
     pub whisper_url: Option<String>,
     pub whisper_model_name: Option<String>,
     pub proxy: Option<String>,
diff --git a/src-tauri/src/utils.rs b/src-tauri/src/utils.rs
index 23dfb32..adec935 100644
--- a/src-tauri/src/utils.rs
+++ b/src-tauri/src/utils.rs
@@ -41,9 +41,12 @@ pub fn transform_subtitles_to_segments(subtitles: Vec<SubtitleEntry>) -> Vec<Seg
 }
 
 pub fn transform_segments_to_chunks(description: &str, segments: Vec<Segment>) -> Vec<String> {
-    let mut chunks = Vec::new();
+    let segments: Vec<Segment> = segments
+        .into_iter()
+        .filter(|segment| !segment.text.trim().is_empty())
+        .collect();
 
-    let mut current_string = String::new();
+    let mut chunks = Vec::new();
 
     let mut timelines = parse_timeline(description);
     timelines.sort_by_key(|e| e.timestamp);
@@ -77,41 +80,41 @@ pub fn transform_segments_to_chunks(description: &str, segments: Vec<Segment>) -
         }
 
         for timeline in timelines {
-            if current_string.len() + timeline.content.len() > 3000 {
-                chunks.push(current_string.clone());
-                current_string.clear();
-            };
-
-            current_string.push_str(&timeline.content);
+            chunks.push(timeline.content);
         }
-        if !current_string.is_empty() {
-            chunks.push(current_string);
-        }
-
         return chunks;
     }
 
-    let mut end_time = 0.0;
     for segment in segments {
-        if current_string.len() + segment.text.len() > 3000 {
-            chunks.push(current_string.clone());
-            current_string.clear();
-        };
-
-        if current_string.len() + segment.text.len() > 2000 && segment.start - end_time > 7.0 {
-            chunks.push(current_string.clone());
-            current_string.clear();
-        }
-
-        current_string.push_str(&segment.text);
-        end_time = segment.end;
+        chunks.push(format!(
+            "{} - {}",
+            convert_microseconds_to_time(segment.start as u64),
+            segment.text
+        ));
     }
+    chunks
+}
 
-    if !current_string.is_empty() {
-        chunks.push(current_string);
-    }
+fn convert_microseconds_to_time(microseconds: u64) -> String {
+    let total_seconds = microseconds / 1_000;
+    let minutes = total_seconds / 60;
+    let seconds = total_seconds % 60;
+    format!("{:02}:{:02}", minutes, seconds)
+}
 
-    chunks
+pub fn transform_segment_to_string(segments: Vec<Segment>) -> String {
+    let mut content = String::new();
+    for segment in segments {
+        content.push_str(
+            format!(
+                "{} - {}\n",
+                convert_microseconds_to_time(segment.start as u64),
+                segment.text
+            )
+            .as_ref(),
+        );
+    }
+    content
 }
 
 #[cfg(test)]
diff --git a/src-tauri/src/whisper.rs b/src-tauri/src/whisper.rs
index ee0be45..63717fd 100644
--- a/src-tauri/src/whisper.rs
+++ b/src-tauri/src/whisper.rs
@@ -10,6 +10,8 @@ use tauri::{Emitter, State};
 use tokio::fs::{self, File};
 use tokio::io::AsyncReadExt;
 
+use crate::gemini::parse_gemini;
+
 use super::db::{self, DataBase};
 use super::setting;
 use super::utils;
@@ -21,6 +23,23 @@ struct TranscriptionResponse {
     segments: Vec<Segment>,
 }
 
+#[derive(Debug, Deserialize, Serialize, Clone)]
+struct GeminiRequest {
+    model: String,
+    contents: Vec<GeminiMessage>,
+}
+
+#[derive(Debug, Deserialize, Serialize, Clone)]
+struct GeminiMessage {
+    role: String,
+    parts: Vec<GeminiPart>,
+}
+
+#[derive(Debug, Deserialize, Serialize, Clone)]
+struct GeminiPart {
+    text: String,
+}
+
 #[derive(Debug, Deserialize, Serialize, Clone)]
 struct Message {
     role: Role,
@@ -84,9 +103,9 @@ pub async fn create_client(app: &tauri::AppHandle) -> Result<Client> {
 //
 #[derive(Debug, Deserialize, Serialize)]
 pub struct Segment {
-    pub text: String,
     pub start: f64,
     pub end: f64,
+    pub text: String,
 }
 
 //
@@ -94,24 +113,206 @@ pub struct Segment {
 //
 fn get_system_prompt(language: &str) -> String {
     let prompt = match language {
-    "es" => "Eres un resumidor multilingüe avanzado. Tu tarea es resumir el contenido proporcionado en español. Si hay una descripción disponible, úsala como contexto para mejorar el resumen. Para contenido corto, proporciona un resumen breve que capture los puntos clave. Para contenido largo, crea un resumen más detallado. Si el contenido no está en español, tradúcelo antes de resumir.",
-
-    "fr" => "Vous êtes un résumé multilingue avancé. Votre tâche est de résumer le contenu fourni en français. Si une description est disponible, utilisez-la comme contexte pour améliorer le résumé. Pour un contenu court, fournissez un bref résumé capturant les points clés. Pour un contenu long, créez un résumé plus détaillé. Si le contenu n'est pas en français, traduisez-le avant de le résumer.",
-
-    "de" => "Sie sind ein fortgeschrittener mehrsprachiger Zusammenfasser. Ihre Aufgabe ist es, den bereitgestellten Inhalt auf Deutsch zusammenzufassen. Wenn eine Beschreibung verfügbar ist, nutzen Sie diese als Kontext für eine bessere Zusammenfassung. Bei kurzem Inhalt erstellen Sie eine knappe Zusammenfassung der Kernpunkte. Bei langem Inhalt erstellen Sie eine ausführlichere Zusammenfassung. Wenn der Inhalt nicht auf Deutsch ist, übersetzen Sie ihn vor der Zusammenfassung.",
-
-    "zh" => "您是一位高级多语言摘要工具。您的任务是用中文总结所提供的内容。如果有描述信息，请将其作为背景来改进摘要。对于简短内容，请提供捕捉要点的简明总结。对于较长内容，请创建更详细的摘要。如果内容不是中文，请先翻译再总结。",
-
-    "zh-TW" => "您是一位高級多語言摘要工具。您的任務是用繁體中文總結所提供的內容。如果有描述資訊，請將其作為背景來改進摘要。對於簡短內容，請提供捕捉要點的簡明總結。對於較長內容，請創建更詳細的摘要。如果內容不是繁體中文，請先翻譯再總結。",
-
-    "ar" => "أنت مُلخص متعدد اللغات متقدم. مهمتك هي تلخيص المحتوى المقدم باللغة العربية. إذا كان هناك وصف متاح، استخدمه كسياق لتحسين الملخص. للمحتوى القصير، قدم ملخصاً موجزاً يلتقط النقاط الرئيسية. للمحتوى الطويل، قم بإنشاء ملخص أكثر تفصيلاً. إذا لم يكن المحتوى باللغة العربية، قم بترجمته قبل تلخيصه.",
-
-    "ru" => "Вы — продвинутый многоязычный резюмер. Ваша задача — создать резюме предоставленного контента на русском языке. Если доступно описание, используйте его как контекст для улучшения резюме. Для короткого контента предоставьте краткое резюме, охватывающее ключевые моменты. Для длинного контента создайте более подробное резюме. Если контент не на русском, переведите его перед резюмированием.",
-
-    "ja" => "あなたは高度な多言語要約者です。提供されたコンテンツを日本語で要約するのがあなたの任務です。説明が利用可能な場合は、それをコンテキストとして要約の改善に使用してください。短いコンテンツの場合は、重要なポイントを捉えた簡潔な要約を提供し、長いコンテンツの場合は、より詳細な要約を作成してください。内容が日本語でない場合、翻訳してから要約してください。",
-
-    _ => "You are an advanced multilingual summarizer. Your task is to summarize the provided content in English. If a description is available, use it as context to improve the summary. For short content, provide a brief summary capturing the key points. For longer content, create a more detailed summary. If the content is not in English, translate it before summarizing.",
-};
+        "zh" => {
+            r#"
+            你是一名专注于分析视频内容的助手。根据以下提供的视频字幕（格式为 `[时间线 - 文本]`），生成章节。如果有视频简介并且包含时间线，各章节参考时间线并保持一致。每个章节应包括以下内容：
+            1. 一个编号比如1， 2 ，3。
+            2. 根据该章节中最早和最晚的时间戳确定的开始时间。
+            3. 对该章节内容的简洁总结，格式为 `总结：编号 时间线 内容`, 如果可以，根据内容提供相应的emoji。
+            指导原则：
+            - 根据主题或内容的变化将字幕分组为章节。
+            - 使用文本中的逻辑过渡点识别新章节的开始位置。
+            - 总结应为3-5句话，概括关键内容。
+            - 可以适当使用emoji.
+            并对根据各章节内容判断是否需要展开介绍，展开介绍在总结结束后。
+            指导原则:
+            - 要和总结的编号，时间线保持一致。
+            - 展开介绍大概是总结长度的3-5倍。
+            展开介绍的输出格式：
+            最终输出的总结部分格式如下：
+            ## 总结：
+            编号. option<emoji> 开始时间 - 内容总结
+            编号. option<emoji> 开始时间 - 内容总结
+            ...
+            ## 展开介绍:
+            编号.  option<emoji> 开始时间 - 内容展开
+            编号.  option<emoji> 开始时间 - 内容展开
+            ...
+            如果内容不是中文，请先翻译成中文。
+            以下是字幕内容：
+            "#
+        }
+        "zh-TW" => {
+            r#"
+        你是一名專注於分析影片內容的助手。根據以下提供的影片字幕（格式為 `[時間線 - 文字]`），產生章節。如果有影片簡介並且包含時間線，各章節參考時間線並保持一致。每個章節應包括以下內容：
+        1. 一個編號比如1， 2 ，3。
+        2. 根據該章節中最早和最晚的時間戳確定的開始時間。
+        3. 對該章節內容的簡潔總結，格式為 `總結：編號 時間線 內容`, 如果可以，根據內容提供相應的emoji。
+        最終輸出的總結部分格式如下：
+        指導原則：
+        - 根據主題或內容的變化將字幕分組為章節。
+        - 使用文本中的邏輯過渡點識別新章節的開始位置。
+        - 總結應為3-5句話，概括關鍵內容。
+        - 可以適當使用emoji.
+        並對根據各章節內容判斷是否需要展開介紹，展開介紹在總結結束後。
+        指導原則:
+        - 要和總結的編號，時間線保持一致。
+        - 展開介紹大概是總結長度的3-5倍。
+        展開介紹的輸出格式：
+        ## 總結：
+        編號. option<emoji> 開始時間 - 內容總結
+        編號. option<emoji> 開始時間 - 內容總結
+        ...
+        ## 展開介紹:
+        編號.  option<emoji> 開始時間 - 內容展開
+        編號.  option<emoji> 開始時間 - 內容展開
+        ...
+        如果內容不是中文，請先翻譯成中文。
+        以下是字幕內容：
+        "#
+        }
+        "es" => {
+            r#"
+        Eres un asistente especializado en analizar contenido de video. Basándote en los subtítulos del video proporcionados a continuación (en el formato `[Marca de tiempo - Texto]`), genera capítulos. Si se proporciona una descripción del video con marcas de tiempo, consúltala y mantén la coherencia. Cada capítulo debe incluir:
+        1. Un número, p. ej., 1, 2, 3.
+        2. La hora de inicio determinada por las marcas de tiempo más temprana y más tardía en ese capítulo.
+        3. Un resumen conciso del contenido del capítulo, con el formato `Resumen: Número Marca de tiempo Contenido`, proporcionando emojis relevantes según el contenido si es posible.
+        Principios rectores:
+        - Agrupa los subtítulos en capítulos según los cambios en el tema o el contenido.
+        - Utiliza puntos de transición lógica en el texto para identificar el inicio de nuevos capítulos.
+        - Los resúmenes deben tener de 3 a 5 oraciones, resumiendo el contenido clave.
+        - Utiliza emojis cuando sea apropiado.
+        Según el contenido de cada capítulo, determina si se necesita una explicación más detallada. Esta explicación detallada debe aparecer después del resumen.
+        Principios rectores:
+        - Mantén la coherencia con la numeración y las marcas de tiempo de los resúmenes.
+        - La explicación detallada debe ser aproximadamente de 3 a 5 veces la longitud del resumen.
+        La sección de resumen de la salida final debe tener el siguiente formato:
+        ## Resumen:
+        Número. option<emoji> Hora de inicio - Resumen del contenido
+        Número. option<emoji> Hora de inicio - Resumen del contenido
+        ...
+        ## Explicación detallada:
+        Número. option<emoji> Hora de inicio - Contenido detallado
+        Número. option<emoji> Hora de inicio - Contenido detallado
+        ...
+        Si el contenido no está en español, tradúcelo al español primero.
+        Aquí están los subtítulos:
+        "#
+        }
+        "fr" => {
+            r#"
+        Vous êtes un assistant spécialisé dans l'analyse de contenu vidéo. Sur la base des sous-titres vidéo fournis ci-dessous (au format `[Horodatage - Texte]`), générez des chapitres. Si une description vidéo avec des horodatages est fournie, référez-vous y et maintenez la cohérence. Chaque chapitre doit inclure :
+        1. Un numéro, par ex. : 1, 2, 3.
+        2. L'heure de début déterminée par les horodatages les plus anciens et les plus récents de ce chapitre.
+        3. Un résumé concis du contenu du chapitre, au format `Résumé : Numéro Horodatage Contenu`, en fournissant des emojis pertinents en fonction du contenu si possible.
+        Principes directeurs :
+        - Regroupez les sous-titres en chapitres en fonction des changements de sujet ou de contenu.
+        - Utilisez les points de transition logique dans le texte pour identifier le début de nouveaux chapitres.
+        - Les résumés doivent comporter de 3 à 5 phrases, résumant le contenu clé.
+        - Utilisez des emojis lorsque cela est approprié.
+        En fonction du contenu de chaque chapitre, déterminez si une explication plus détaillée est nécessaire. Cette explication détaillée doit apparaître après le résumé.
+        Principes directeurs :
+        - Maintenez la cohérence avec la numérotation et les horodatages des résumés.
+        - L'explication détaillée doit être environ 3 à 5 fois plus longue que le résumé.
+        La section de résumé de la sortie finale doit être formatée comme suit :
+        ## Résumé :
+        Numéro. option<emoji> Heure de début - Résumé du contenu
+        Numéro. option<emoji> Heure de début - Résumé du contenu
+        ...
+        ## Explication détaillée :
+        Numéro. option<emoji> Heure de début - Contenu détaillé
+        Numéro. option<emoji> Heure de début - Contenu détaillé
+        ...
+        Si le contenu n'est pas en français, veuillez d'abord le traduire en français.
+        Voici les sous-titres :
+        "#
+        }
+        "de" => {
+            r#"
+        Du bist ein Assistent, der sich auf die Analyse von Videoinhalten spezialisiert hat. Generiere basierend auf den unten angegebenen Video-Untertiteln (im Format `[Zeitstempel - Text]`) Kapitel. Wenn eine Videobeschreibung mit Zeitstempeln vorhanden ist, beziehe dich darauf und sorge für Konsistenz. Jedes Kapitel sollte Folgendes enthalten:
+        1. Eine Nummer, z. B. 1, 2, 3.
+        2. Die Startzeit, die durch die frühesten und spätesten Zeitstempel in diesem Kapitel bestimmt wird.
+        3. Eine prägnante Zusammenfassung des Inhalts des Kapitels im Format `Zusammenfassung: Nummer Zeitstempel Inhalt`. Füge nach Möglichkeit relevante Emojis basierend auf dem Inhalt hinzu.
+        Leitprinzipien:
+        - Gruppiere Untertitel basierend auf Änderungen in Thema oder Inhalt in Kapitel.
+        - Verwende logische Übergangspunkte im Text, um den Beginn neuer Kapitel zu identifizieren.
+        - Zusammenfassungen sollten 3-5 Sätze umfassen und den wichtigsten Inhalt zusammenfassen.
+        - Verwende Emojis, wo es angebracht ist.
+        Beurteile anhand des Inhalts jedes Kapitels, ob eine detailliertere Erläuterung erforderlich ist. Diese detailliertere Erläuterung sollte nach der Zusammenfassung erscheinen.
+        Leitprinzipien:
+        - Sorge für Konsistenz mit der Nummerierung und den Zeitstempeln der Zusammenfassungen.
+        - Die detailliertere Erläuterung sollte ungefähr 3-5 Mal so lang sein wie die Zusammenfassung.
+        Die Zusammenfassung im Endergebnis sollte wie folgt formatiert sein:
+        ## Zusammenfassung:
+        Nummer. option<emoji> Startzeit - Inhaltszusammenfassung
+        Nummer. option<emoji> Startzeit - Inhaltszusammenfassung
+        ...
+        ## Detaillierte Erläuterung:
+        Nummer. option<emoji> Startzeit - Detaillierter Inhalt
+        Nummer. option<emoji> Startzeit - Detaillierter Inhalt
+        ...
+        Wenn der Inhalt nicht auf Deutsch ist, übersetze ihn bitte zuerst ins Deutsche.
+        Hier sind die Untertitel:
+        "#
+        }
+        "ja" => {
+            r#"
+        あなたは、ビデオコンテンツの分析に特化したアシスタントです。以下に示すビデオの字幕（`[タイムスタンプ - テキスト]`形式）に基づいて、章を生成してください。タイムスタンプを含むビデオの説明がある場合は、それを参照して一貫性を保ってください。各章には以下を含める必要があります。
+        1. 番号（例：1、2、3）。
+        2. その章の最初と最後のタイムスタンプによって決定される開始時間。
+        3. 章の内容の簡潔な要約（`要約：番号 タイムスタンプ 内容`形式）。可能であれば、内容に基づいて適切な絵文字を提供します。
+        指針：
+        - トピックまたは内容の変更に基づいて字幕を章にグループ化します。
+        - テキスト内の論理的な移行ポイントを使用して、新しい章の開始位置を特定します。
+        - 要約は、主要な内容を要約した3〜5文にする必要があります。
+        - 適切な場合は絵文字を使用します。
+        各章の内容に基づいて、より詳細な説明が必要かどうかを判断します。この詳細な説明は、要約の後に表示される必要があります。
+        指針：
+        - 要約の番号とタイムスタンプとの一貫性を保ちます。
+        - 詳細な説明は、要約の約3〜5倍の長さにする必要があります。
+        詳細な説明の出力形式は次のとおりです。
+        最終出力の要約セクションは、次の形式にする必要があります。
+        ## 要約：
+        番号. option<emoji> 開始時間 - 内容要約
+        番号. option<emoji> 開始時間 - 内容要約
+        ...
+        ## 詳細な説明：
+        番号. option<emoji> 開始時間 - 詳細な内容
+        番号. option<emoji> 開始時間 - 詳細な内容
+        ...
+        コンテンツが日本語でない場合は、最初に日本語に翻訳してください。
+        以下は字幕です。
+        "#
+        }
+        _ => {
+            r#"
+        You are an assistant specializing in analyzing video content. Based on the video subtitles provided below (in the format `[Timestamp - Text]`), generate chapters. If a video description with timestamps is provided, refer to it and maintain consistency. Each chapter should include:
+        1. A number, e.g., 1, 2, 3.
+        2. The start time determined by the earliest and latest timestamps in that chapter.
+        3. A concise summary of the chapter's content, formatted as `Summary: Number Timestamp Content`, providing relevant emojis based on the content if possible.
+        Guiding principles:
+        - Group subtitles into chapters based on changes in topic or content.
+        - Use logical transition points in the text to identify the start of new chapters.
+        - Summaries should be 3-5 sentences, summarizing the key content.
+        - Use emojis where appropriate.
+        Based on each chapter's content, determine if a more detailed explanation is needed. 
+        Guiding principles:
+        - Maintain consistency with the numbering and timestamps of the summaries.
+        - The detailed explanation should be approximately 3-5 times the length of the summary.
+        The final output's summary sections should be formatted as follows:
+        ## Short Summary:
+        Number. option<emoji> Start Time - Content Summary
+        Number. option<emoji> Start Time - Content Summary
+        ...
+        ## Detailed Explanation:
+        Number. option<emoji> Start Time - Detailed Content
+        Number. option<emoji> Start Time - Detailed Content
+        ...
+        If the content is not in English, please translate it to English first.
+        Here are the subtitles:
+        "#
+        }
+    };
     prompt.to_string()
 }
 
@@ -131,19 +332,14 @@ pub async fn run_summary(
     let (transcripts, description) = db::get_subtitle_with_id(db, video_id)?;
     let subtitles: Vec<Segment> = serde_json::from_str(&transcripts).map_err(|e| e.to_string())?;
 
-    let chunks = utils::transform_segments_to_chunks(&description, subtitles);
-
-    let mut summary = Vec::new();
+    let content = utils::transform_segment_to_string(subtitles);
 
     app.emit("summary", "[start]".to_string())
         .map_err(|e| e.to_string())?;
-    for chunk in chunks {
-        summary.push(chat_stream(&app, &chunk, &lang, &description).await?)
-    }
+    let summary_content = chat_stream(&app, &content, &lang, &description).await?;
     app.emit("summary", "[end]".to_string())
         .map_err(|e| e.to_string())?;
 
-    let summary_content = summary.join("\n\n");
     db::update_video(
         app.state(),
         video_id,
@@ -170,11 +366,86 @@ pub async fn chat_stream(
         }) => (ai_url, ai_model_name, api_key),
         _ => return Err("no api settings found".to_string()),
     };
-
     let client = create_client(app).await.map_err(|e| e.to_string())?;
 
-    let message = format!("short description for the whole content: {description}. and with content or partial content as following: {user_message}");
+    let message = format!(
+        "short description for the whole content: {description}. full subtitles: {user_message}"
+    );
 
+    if api_url.contains("googleapis") {
+        handle_gemini_api(app, lang, message, llm_model, client, &api_url, &api_key).await
+    } else {
+        handle_open_api(app, lang, message, llm_model, client, &api_url, &api_key).await
+    }
+}
+
+async fn handle_gemini_api(
+    app: &tauri::AppHandle,
+    lang: &str,
+    message: String,
+    llm_model: String,
+    client: Client,
+    api_url: &str,
+    _api_key: &str,
+) -> Result<String, String> {
+    let contents: Vec<GeminiMessage> = vec![
+        GeminiMessage {
+            role: "model".to_string(),
+            parts: vec![GeminiPart {
+                text: get_system_prompt(lang),
+            }],
+        },
+        GeminiMessage {
+            role: "user".to_string(),
+            parts: vec![GeminiPart { text: message }],
+        },
+    ];
+
+    let request = GeminiRequest {
+        model: llm_model,
+        contents,
+    };
+
+    let response = client
+        .post(api_url)
+        .header("Content-Type", "application/json")
+        .json(&request)
+        .send()
+        .await
+        .map_err(|e| e.to_string())?;
+
+    let mut summary = Vec::new();
+
+    let mut stream = response.bytes_stream();
+    while let Some(chunk) = stream.next().await {
+        let chunk = chunk.map_err(|e| e.to_string())?;
+        let chunk_str = String::from_utf8_lossy(&chunk);
+
+        for line in chunk_str.split("data: ") {
+            let line = line.trim();
+            if line.is_empty() || line == "[DONE]" {
+                continue;
+            }
+            if let Ok(content) = parse_gemini(line) {
+                summary.push(content.clone());
+                app.emit("summary", content).map_err(|e| e.to_string())?;
+                std::io::stdout().flush().unwrap();
+            }
+        }
+    }
+
+    Ok(summary.join(""))
+}
+
+async fn handle_open_api(
+    app: &tauri::AppHandle,
+    lang: &str,
+    message: String,
+    llm_model: String,
+    client: Client,
+    api_url: &str,
+    api_key: &str,
+) -> Result<String, String> {
     let request = ChatRequest {
         messages: vec![
             Message {
@@ -186,9 +457,10 @@ pub async fn chat_stream(
                 content: message,
             },
         ],
-        model: llm_model.to_string(),
+        model: llm_model,
         stream: true,
     };
+
     let response = client
         .post(api_url)
         .header("Content-Type", "application/json")
@@ -274,7 +546,7 @@ pub async fn trancript(app: &tauri::AppHandle, audio_path: &Path) -> Result<Vec<
         Some(setting::AppSettings {
             whisper_url: Some(whisper_url),
             whisper_model_name: Some(whisper_model_name),
-            api_key: Some(api_key),
+            whisper_api_key: Some(api_key),
             ..
         }) => (whisper_url, whisper_model_name, api_key),
         _ => return Err("no api settings found".to_string()),
diff --git a/src/components/LanguageSelector.tsx b/src/components/LanguageSelector.tsx
index fc1e413..c3e4a17 100644
--- a/src/components/LanguageSelector.tsx
+++ b/src/components/LanguageSelector.tsx
@@ -1,6 +1,6 @@
 import * as React from "react";
 import * as Select from "@radix-ui/react-select";
-import { ChevronDownIcon, CheckIcon, Languages } from "lucide-react";
+import { CheckIcon, ChevronDownIcon, Languages } from "lucide-react";
 
 interface Language {
   code: string;
@@ -15,8 +15,6 @@ const LANGUAGES: Language[] = [
   { code: "es", name: "Español" },
   { code: "fr", name: "Français" },
   { code: "de", name: "Deutsch" },
-  { code: "ar", name: "العربية" },
-  { code: "ru", name: "Русский" },
 ];
 
 interface LanguageSelectorProps {
diff --git a/src/components/SettingsModal.tsx b/src/components/SettingsModal.tsx
index a403b0d..c21337b 100644
--- a/src/components/SettingsModal.tsx
+++ b/src/components/SettingsModal.tsx
@@ -1,12 +1,13 @@
 import * as React from "react";
 import * as Dialog from "@radix-ui/react-dialog";
-import { Settings, X, Eye, EyeOff } from "lucide-react";
+import { Eye, EyeOff, Settings, X } from "lucide-react";
 import { useSettings } from "store/SettingsProvider";
-import { SettingsType } from "types/settings";
+import type { SettingsType } from "types/settings";
 
 const SettingsModal: React.FC = () => {
   const [isOpen, setIsOpen] = React.useState(false);
   const [showApiKey, setShowApiKey] = React.useState(false);
+  const [showWhsperApiKey, setWhisperShowApiKey] = React.useState(false);
 
   const { settings: saveSettings, updateSettings } = useSettings();
   const [settings, setSettings] = React.useState<SettingsType>(saveSettings);
@@ -135,6 +136,38 @@ const SettingsModal: React.FC = () => {
                 />
               </div>
 
+              <div className="relative">
+                <label
+                  htmlFor="whisperApiKey"
+                  className="block text-sm font-medium text-gray-700 mb-1"
+                >
+                  Whisper API Key
+                </label>
+                <div className="flex items-center">
+                  <input
+                    type={showWhsperApiKey ? "text" : "password"}
+                    id="whisperApiKey"
+                    name="whisperApiKey"
+                    value={settings.whisperApiKey || ""}
+                    onChange={handleInputChange}
+                    className="flex-grow px-3 py-2 border border-gray-300 rounded-md shadow-sm 
+                               focus:outline-none focus:ring-2 focus:ring-blue-500"
+                    placeholder="Enter your Whisper API key"
+                  />
+                  <button
+                    type="button"
+                    onClick={() => setWhisperShowApiKey(!showWhsperApiKey)}
+                    className="ml-2 text-gray-500 hover:text-gray-700"
+                  >
+                    {showWhsperApiKey ? (
+                      <EyeOff className="w-5 h-5" />
+                    ) : (
+                      <Eye className="w-5 h-5" />
+                    )}
+                  </button>
+                </div>
+              </div>
+
               <div>
                 <label
                   htmlFor="whisperUrl"
diff --git a/src/store/SettingsProvider.tsx b/src/store/SettingsProvider.tsx
index edab404..16246d7 100644
--- a/src/store/SettingsProvider.tsx
+++ b/src/store/SettingsProvider.tsx
@@ -1,5 +1,5 @@
 import * as React from "react";
-import { SettingsType } from "types/settings";
+import type { SettingsType } from "types/settings";
 
 import { invoke } from "@tauri-apps/api/core";
 
@@ -7,6 +7,7 @@ const defaultSettings: SettingsType = {
   apiKey: null,
   aiUrl: null,
   aiModelName: null,
+  whisperApiKey: null,
   whisperUrl: null,
   whisperModelName: null,
   proxy: null,
diff --git a/src/types/settings.ts b/src/types/settings.ts
index 53791e6..9a0a3fa 100644
--- a/src/types/settings.ts
+++ b/src/types/settings.ts
@@ -2,6 +2,7 @@ export interface SettingsType {
   apiKey: string | null;
   aiUrl: string | null;
   aiModelName: string | null;
+  whisperApiKey: string | null;
   whisperUrl: string | null;
   whisperModelName: string | null;
   proxy: string | null;