From 45e57073c0a9ef33c4f67c9c88a0f8cc316f1f41 Mon Sep 17 00:00:00 2001
From: Liam Thompson <32779855+leemthompo@users.noreply.github.com>
Date: Tue, 7 Jan 2025 15:18:39 +0100
Subject: [PATCH] [DOCS] RAG overview (#119590)

---
 docs/reference/images/search/rag-schema.svg   | 73 ++++++++++++++++++
 .../images/search/rag-venn-diagram.svg        | 19 +++++
 .../retrieval-augmented-generation.asciidoc   | 76 +++++++++++++++++++
 .../search-your-data.asciidoc                 |  1 +
 4 files changed, 169 insertions(+)
 create mode 100644 docs/reference/images/search/rag-schema.svg
 create mode 100644 docs/reference/images/search/rag-venn-diagram.svg
 create mode 100644 docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
diff --git a/docs/reference/images/search/rag-schema.svg b/docs/reference/images/search/rag-schema.svg
new file mode 100644
index 0000000000000..f26edac6c0077
--- /dev/null
+++ b/docs/reference/images/search/rag-schema.svg
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<svg viewBox="-20 -80 840 380" xmlns="http://www.w3.org/2000/svg">
+    <!-- Arrow markers definition -->
+    <defs>
+        <marker id="arrowhead" markerWidth="10" markerHeight="7" refX="9" refY="3.5" orient="auto">
+            <polygon points="0 0, 10 3.5, 0 7" fill="#666"/>
+        </marker>
+        <marker id="arrowhead-blue" markerWidth="10" markerHeight="7" refX="9" refY="3.5" orient="auto">
+            <polygon points="0 0, 10 3.5, 0 7" fill="#0066cc"/>
+        </marker>
+    </defs>
+    
+    <!-- Custom Instructions component -->
+    <g>
+        <text x="470" y="-15" text-anchor="middle" font-family="Arial" font-size="11" fill="#666">Define how the model should</text>
+        <text x="470" y="-5" text-anchor="middle" font-family="Arial" font-size="11" fill="#666">parse and render information</text>
+        <rect x="410" y="5" width="120" height="35" rx="10" fill="#e8f0f9" stroke="#0066cc"/>
+        <text x="470" y="25" text-anchor="middle" font-family="Arial" font-size="12" fill="#333">Custom instructions</text>
+        <line x1="470" y1="40" x2="470" y2="95" stroke="#0066cc" stroke-width="2" stroke-dasharray="2" marker-end="url(#arrowhead-blue)"/>
+    </g>
+
+    <!-- Search Strategy component -->
+    <g>
+        <text x="310" y="-15" text-anchor="middle" font-family="Arial" font-size="11" fill="#666">Full-text, semantic</text>
+        <text x="310" y="-5" text-anchor="middle" font-family="Arial" font-size="11" fill="#666">or hybrid search</text>
+        <rect x="250" y="5" width="120" height="35" rx="10" fill="#e8f0f9" stroke="#0066cc"/>
+        <text x="310" y="25" text-anchor="middle" font-family="Arial" font-size="12" fill="#333">Search strategy</text>
+        <line x1="310" y1="40" x2="310" y2="95" stroke="#0066cc" stroke-width="2" stroke-dasharray="2" marker-end="url(#arrowhead-blue)"/>
+    </g>
+
+    <!-- Number circles -->
+    <g>
+        <circle cx="150" cy="85" r="10" fill="#333"/>
+        <text x="150" y="89" text-anchor="middle" font-family="Arial" font-size="12" fill="white">1</text>
+        
+        <circle cx="240" cy="85" r="10" fill="#333"/>
+        <text x="240" y="89" text-anchor="middle" font-family="Arial" font-size="12" fill="white">2</text>
+        
+        <circle cx="400" cy="85" r="10" fill="#333"/>
+        <text x="400" y="89" text-anchor="middle" font-family="Arial" font-size="12" fill="white">3</text>
+        
+        <circle cx="580" cy="85" r="10" fill="#333"/>
+        <text x="580" y="89" text-anchor="middle" font-family="Arial" font-size="12" fill="white">4</text>
+    </g>
+
+    <!-- Main flow components -->
+    <!-- Input component -->
+    <path d="M150,115 h45 a15,15 0 0 1 0,30 h-45 a15,15 0 0 1 0,-30" fill="#edf7ec" stroke="#006400"/>
+    <text x="172" y="135" text-anchor="middle" font-family="Arial" font-size="12" fill="#006400">User query</text>
+
+    <!-- Search component -->
+    <rect x="250" y="100" width="120" height="60" rx="10" fill="#f0f0f0" stroke="#000"/>
+    <text x="310" y="135" text-anchor="middle" font-family="Arial" font-size="12" fill="#333">Elasticsearch</text>
+    <text x="310" y="180" text-anchor="middle" font-family="Arial" font-size="12" fill="#666">
+        <tspan font-weight="bold">Retrieves</tspan> relevant</text>
+    <text x="310" y="195" text-anchor="middle" font-family="Arial" font-size="12" fill="#666">documents</text>
+
+    <!-- Processing component -->
+    <rect x="410" y="100" width="120" height="60" rx="10" fill="#f0f0f0" stroke="#000"/>
+    <text x="470" y="135" text-anchor="middle" font-family="Arial" font-size="12" fill="#333">Language model</text>
+    <text x="470" y="180" text-anchor="middle" font-family="Arial" font-size="12" fill="#666">Processes context &amp;</text>
+    <text x="470" y="195" text-anchor="middle" font-family="Arial" font-size="12" fill="#666">
+        <tspan font-weight="bold">generates</tspan> answer</text>
+
+    <!-- Flow connections (arrows) -->
+    <line x1="210" y1="130" x2="250" y2="130" stroke="#666" stroke-width="2" marker-end="url(#arrowhead)"/>
+    <line x1="370" y1="130" x2="410" y2="130" stroke="#666" stroke-width="2" marker-end="url(#arrowhead)"/>
+    <line x1="530" y1="130" x2="570" y2="130" stroke="#666" stroke-width="2" marker-end="url(#arrowhead)"/>
+
+    <!-- Output component -->
+    <path d="M585,115 h45 a15,15 0 0 1 0,30 h-45 a15,15 0 0 1 0,-30" fill="#edf7ec" stroke="#006400"/>
+    <text x="607" y="135" text-anchor="middle" font-family="Arial" font-size="12" fill="#006400">Response</text>
+</svg>
\ No newline at end of file
diff --git a/docs/reference/images/search/rag-venn-diagram.svg b/docs/reference/images/search/rag-venn-diagram.svg
new file mode 100644
index 0000000000000..9906aaefaba0c
--- /dev/null
+++ b/docs/reference/images/search/rag-venn-diagram.svg
@@ -0,0 +1,19 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 600 400">
+  <!-- Left circle (Information Retrieval) -->
+  <circle cx="220" cy="200" r="150" fill="#4A90E2" opacity="0.6"/>
+  
+  <!-- Right circle (Generative AI) -->
+  <circle cx="380" cy="200" r="150" fill="#50C878" opacity="0.6"/>
+  
+  <!-- Text labels -->
+  <text x="160" y="210" font-family="Arial" font-size="20" fill="#2C3E50" text-anchor="middle">Information
+    <tspan x="160" y="235">retrieval</tspan>
+  </text>
+  
+  <text x="440" y="200" font-family="Arial" font-size="20" fill="#2C3E50" text-anchor="middle">Generative
+    <tspan x="440" y="225">AI</tspan>
+  </text>
+  
+  <!-- RAG label in intersection -->
+  <text x="300" y="200" font-family="Arial" font-size="28" font-weight="bold" fill="#2C3E50" text-anchor="middle">RAG</text>
+</svg>
\ No newline at end of file
diff --git a/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
new file mode 100644
index 0000000000000..2958999ede91d
--- /dev/null
+++ b/docs/reference/search/search-your-data/retrieval-augmented-generation.asciidoc
@@ -0,0 +1,76 @@
+[rag-elasticsearch]
+== Retrieval augmented generation
+
+.🍿 Prefer a video introduction?
+***********************
+Check out https://www.youtube.com/watch?v=OS4ZefUPAks[this short video] from the Elastic Snackable Series.
+***********************
+
+Retrieval Augmented Generation (RAG) is a technique for improving language model responses by grounding the model with additional, verifiable sources of information. It works by first retrieving relevant context from an external datastore, which is then added to the model's context window.
+
+RAG is a form of https://arxiv.org/abs/2301.00234[in-context learning], where the model learns from information provided at inference time.
+Compared to fine-tuning or continuous pre-training, RAG can be implemented more quickly and cheaply, and offers several advantages.
+
+image::images/search/rag-venn-diagram.svg[RAG sits at the intersection of information retrieval and generative AI, align=center, width=500]
+
+RAG sits at the intersection of https://www.elastic.co/what-is/information-retrieval[information retrieval] and generative AI.
+{es} is an excellent tool for implementing RAG, because it offers various retrieval capabilities, such as full-text search, vector search, and hybrid search, as well as other tools like filtering, aggregations, and security features.
+
+[discrete]
+[[rag-elasticsearch-advantages]]
+=== Advantages of RAG
+
+Implementing RAG with {es} has several advantages:
+
+* *Improved context:* Enables grounding the language model with additional, up-to-date, and/or private data.
+* *Reduced hallucination:* Helps minimize factual errors by enabling models to cite authoritative sources.
+* *Cost efficiency:* Requires less maintenance compared to fine-tuning or continuously pre-training models.
+* *Built-in security:* Controls data access by leveraging {es}'s <<authorization, user authorization>> features, such as role-based access control and field/document-level security.
+* *Simplified response parsing:* Eliminates the need for custom parsing logic by letting the language model handle parsing {es} responses and formatting the retrieved context.
+* *Flexible implementation:* Works with basic <<full-text-search,full-text search>>, and can be gradually updated to add more advanced and computationally intensive <<semantic-search,semantic search>> capabilities.
+
+[discrete]
+[[rag-elasticsearch-components]]
+=== RAG system overview
+
+The following diagram illustrates a simple RAG system using {es}.
+
+image::images/search/rag-schema.svg[Components of a simple RAG system using Elasticsearch, align=center, role="stretch"]
+
+The workflow is as follows:
+
+. The user submits a query.
+. Elasticsearch retrieves relevant documents using full-text search, vector search, or hybrid search.
+. The language model processes the context and generates a response, using custom instructions. Examples of custom instructions include "Cite a source" or "Provide a concise summary of the `content` field in markdown format."
+. The model returns the final response to the user.
+
+[TIP]
+====
+A more advanced setup might include query rewriting between steps 1 and 2. This intermediate step could use one or more additional language models with different instructions to reformulate queries for more specific and detailed responses.
+====
+
+[discrete]
+[[rag-elasticsearch-getting-started]]
+=== Getting started
+
+Start building RAG applications quickly with Playground, which seamlessly integrates {es} with language model providers.
+The Playground UI enables you to build, test, and deploy RAG interfaces on top of your {es} indices.
+
+Playground automatically selects the best retrieval methods for your data, while providing full control over the final {es} queries and language model instructions.
+You can also download the underlying Python code to integrate with your existing applications.
+
+Learn more in the {kibana-ref}/playground.html[Playground documentation] and 
+try the https://www.elastic.co/demo-gallery/ai-playground[interactive lab] for hands-on experience.
+
+[discrete]
+[[rag-elasticsearch-learn-more]]
+=== Learn more
+
+Learn more about building RAG systems using {es} in these blog posts:
+
+* https://www.elastic.co/blog/beyond-rag-basics-semantic-search-with-elasticsearch[Beyond RAG Basics: Advanced strategies for AI applications]
+* https://www.elastic.co/search-labs/blog/building-a-rag-system-with-gemma-hugging-face-elasticsearch[Building a RAG system with Gemma, Hugging Face, and Elasticsearch]
+* https://www.elastic.co/search-labs/blog/rag-agent-tool-elasticsearch-langchain[Building an agentic RAG tool with Elasticsearch and Langchain]
+
+
+
diff --git a/docs/reference/search/search-your-data/search-your-data.asciidoc b/docs/reference/search/search-your-data/search-your-data.asciidoc
index 90a273a8cfe4d..0828462fd1850 100644
--- a/docs/reference/search/search-your-data/search-your-data.asciidoc
+++ b/docs/reference/search/search-your-data/search-your-data.asciidoc
@@ -49,6 +49,7 @@ include::../../how-to/recipes.asciidoc[]
 include::retrievers-overview.asciidoc[]
 include::knn-search.asciidoc[]
 include::semantic-search.asciidoc[]
+include::retrieval-augmented-generation.asciidoc[]
 include::search-across-clusters.asciidoc[]
 include::search-with-synonyms.asciidoc[]
 include::search-application-overview.asciidoc[]