diff --git a/packages/llm/ollama/Dockerfile b/packages/llm/ollama/Dockerfile
index f246d3049..fddc0b1d5 100644
--- a/packages/llm/ollama/Dockerfile
+++ b/packages/llm/ollama/Dockerfile
@@ -35,6 +35,7 @@ RUN CMAKE_VERSION=${CMAKE_VERSION} GOLANG_VERSION=${GOLANG_VERSION} sh /opt/olla
WORKDIR ollama/llm/generate
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/nvidia/compat:${LD_LIBRARY_PATH} \
CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}
+RUN sed 's|-j8|-j$(nproc)|' -i gen_common.sh && cat gen_common.sh
RUN bash gen_linux.sh
WORKDIR /opt/ollama
diff --git a/packages/rag/langchain/Dockerfile.samples b/packages/rag/langchain/Dockerfile.samples
index e2b064799..05ca0db59 100644
--- a/packages/rag/langchain/Dockerfile.samples
+++ b/packages/rag/langchain/Dockerfile.samples
@@ -5,9 +5,24 @@
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
+RUN pip install \
+ chromadb \
+ pypdf \
+ pytest \
+ boto3 \
+ jetson-stats \
+ unstructured \
+ langchain-openai \
+ streamlit \
+ chainlit
+
WORKDIR /opt/langchain
COPY *.ipynb ./
# RUN sed 's|^\ \ }| ,"notebook_dir": "/opt/langchain"\n }|' -i /root/.jupyter/jupyter_notebook_config.json
-WORKDIR /
\ No newline at end of file
+WORKDIR /
+
+CMD /start_ollama && \
+ JUPYTER_ROOT=/opt/langchain /start_jupyter && \
+ /bin/bash
\ No newline at end of file
diff --git a/packages/rag/langchain/LangChain_Local-LLMs.ipynb b/packages/rag/langchain/LangChain_Local-LLMs.ipynb
index 398fe131f..b2d1a5337 100644
--- a/packages/rag/langchain/LangChain_Local-LLMs.ipynb
+++ b/packages/rag/langchain/LangChain_Local-LLMs.ipynb
@@ -1,135 +1,824 @@
{
"cells": [
+ {
+ "cell_type": "markdown",
+ "id": "6ae0145b-00e8-4e81-810b-6e90854e4a91",
+ "metadata": {},
+ "source": [
+ "# LangChain RAG with Local Models\n",
+ "\n",
+ "This is based on Pixegami's tutorial. ([original repo](https://github.com/pixegami/rag-tutorial-v2/))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bbdd4d05-ddac-4ccf-b71d-650ad8f92d0b",
+ "metadata": {},
+ "source": [
+ "## Download data and folder setup\n",
+ "\n",
+ "On the **Docker host side**, run the following to set up the `jetson-containers`' `/data` directory.\n",
+ "\n",
+ "```\n",
+ "cd jetson-containers\n",
+ "mkdir -p data/documents/L4T-README\n",
+ "cp /media/jetson/L4T-README/*.txt data/documents/L4T-README/\n",
+ "```\n",
+ "\n",
+ "This in turn creates the mounted volume `/data/documents/L4T-README` inside the container.
\n",
+ "Your directory structure should look like this:\n",
+ "\n",
+ "```\n",
+ "└── ./data/documents/L4T-README\n",
+ " ├── INDEX.txt\n",
+ " ├── README-usb-dev-mode.txt\n",
+ " ├── README-vnc.txt\n",
+ " └── README-wifi.txt\n",
+ "```\n",
+ "\n",
+ "You can check this with running a following bash command in the following cell."
+ ]
+ },
{
"cell_type": "code",
"execution_count": 1,
- "id": "2634dfa3-1263-42b9-901d-004f5166e38e",
+ "id": "973053ec-7c55-4126-a8cc-a66ef65a5aa4",
"metadata": {
"tags": []
},
"outputs": [
{
- "name": "stderr",
+ "name": "stdout",
"output_type": "stream",
"text": [
- "ggml_init_cublas: found 1 CUDA devices:\n",
- " Device 0: Orin, compute capability 8.7\n",
- "llama.cpp: loading model from /data/models/text-generation-webui/TheBloke_Llama-2-13B-chat-GGML/llama-2-13b-chat.ggmlv3.q4_0.bin\n",
- "llama_model_load_internal: format = ggjt v3 (latest)\n",
- "llama_model_load_internal: n_vocab = 32000\n",
- "llama_model_load_internal: n_ctx = 2048\n",
- "llama_model_load_internal: n_embd = 5120\n",
- "llama_model_load_internal: n_mult = 256\n",
- "llama_model_load_internal: n_head = 40\n",
- "llama_model_load_internal: n_head_kv = 40\n",
- "llama_model_load_internal: n_layer = 40\n",
- "llama_model_load_internal: n_rot = 128\n",
- "llama_model_load_internal: n_gqa = 1\n",
- "llama_model_load_internal: rnorm_eps = 5.0e-06\n",
- "llama_model_load_internal: n_ff = 13824\n",
- "llama_model_load_internal: freq_base = 10000.0\n",
- "llama_model_load_internal: freq_scale = 1\n",
- "llama_model_load_internal: ftype = 2 (mostly Q4_0)\n",
- "llama_model_load_internal: model size = 13B\n",
- "llama_model_load_internal: ggml ctx size = 0.11 MB\n",
- "llama_model_load_internal: using CUDA for GPU acceleration\n",
- "llama_model_load_internal: mem required = 582.00 MB (+ 1600.00 MB per state)\n",
- "llama_model_load_internal: allocating batch_size x (640 kB + n_ctx x 160 B) = 480 MB VRAM for the scratch buffer\n",
- "llama_model_load_internal: offloading 40 repeating layers to GPU\n",
- "llama_model_load_internal: offloading non-repeating layers to GPU\n",
- "llama_model_load_internal: offloading v cache to GPU\n",
- "llama_model_load_internal: offloading k cache to GPU\n",
- "llama_model_load_internal: offloaded 43/43 layers to GPU\n",
- "llama_model_load_internal: total VRAM used: 8976 MB\n",
- "llama_new_context_with_model: kv self size = 1600.00 MB\n",
- "AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n"
+ "/data/documents/L4T-README:\n",
+ "total 24\n",
+ "-rw-rw-r-- 1 1000 1000 1104 May 6 22:42 INDEX.txt\n",
+ "-rw-rw-r-- 1 1000 1000 11126 May 6 22:42 README-usb-dev-mode.txt\n",
+ "-rw-rw-r-- 1 1000 1000 3590 May 6 22:42 README-vnc.txt\n",
+ "-rw-rw-r-- 1 1000 1000 1940 May 6 22:42 README-wifi.txt\n"
]
}
],
"source": [
- "from langchain.llms import LlamaCpp\n",
- "from langchain.callbacks.manager import CallbackManager\n",
- "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
- "\n",
- "n_gpu_layers = 128 # Metal set to 1 is enough.\n",
- "n_batch = 512 # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.\n",
- "callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])\n",
- "\n",
- "# Make sure the model path is correct for your system!\n",
- "llm = LlamaCpp(\n",
- " model_path=\"/data/models/text-generation-webui/TheBloke_Llama-2-13B-chat-GGML/llama-2-13b-chat.ggmlv3.q4_0.bin\",\n",
- " n_gpu_layers=n_gpu_layers,\n",
- " n_batch=n_batch,\n",
- " n_ctx=2048,\n",
- " f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls\n",
- " callback_manager=callback_manager,\n",
- " verbose=True,\n",
- ")"
+ "!ls -Rl /data/documents/L4T-README"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "36e251a0-24f1-484d-a278-d513e4f4cd36",
+ "metadata": {},
+ "source": [
+ "## Loading The Data"
]
},
{
"cell_type": "code",
"execution_count": 2,
- "id": "7d535c96-c9ff-42cd-b388-5b1d42c1102d",
+ "id": "3a24edf3-1751-4f11-a1d0-0ca1921e1014",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from langchain_community.document_loaders import DirectoryLoader\n",
+ "\n",
+ "DATA_PATH = '/data/documents/L4T-README'\n",
+ "\n",
+ "def load_documents():\n",
+ " document_loader = DirectoryLoader(DATA_PATH, glob=\"*.txt\")\n",
+ " return document_loader.load()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "922717b9-f225-463d-8b87-9a37a8b85435",
"metadata": {
"tags": []
},
"outputs": [
{
- "name": "stdout",
+ "name": "stderr",
"output_type": "stream",
"text": [
- "\n",
- "Create an itinerary for a 3-day trip to San Francisco, California. Include the following:\n",
- "Day 1:\n",
- "- Morning: Visit Fisherman's Wharf and take a ferry to Alcatraz Island.\n",
- "- Afternoon: Explore Golden Gate Park and visit the de Young Museum.\n",
- "- Evening: Enjoy dinner at North Beach.\n",
- "\n",
- "Day 2:\n",
- "- Morning: Take a trip to Muir Woods National Monument to see the redwood forests.\n",
- "- Afternoon: Visit the Exploratorium science museum and take a walk along the pier.\n",
- "- Evening: Attend a performance at the San Francisco Symphony.\n",
- "\n",
- "Day 3:\n",
- "- Morning: Take a cable car ride to the top of Nob Hill.\n",
- "- Afternoon: Visit the California Palace of the Legion of Honor and the Japanese Tea Garden.\n",
- "- Evening: Enjoy dinner at one of the many excellent seafood restaurants in the city.\n",
- "\n",
- "Note: This is just a sample itinerary and can be adjusted to fit your interests and schedule. Additionally, consider purchasing a San Francisco CityPASS"
+ "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
+ "[nltk_data] Unzipping tokenizers/punkt.zip.\n",
+ "[nltk_data] Downloading package averaged_perceptron_tagger to\n",
+ "[nltk_data] /root/nltk_data...\n",
+ "[nltk_data] Unzipping taggers/averaged_perceptron_tagger.zip.\n"
]
},
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "4\n",
+ "page_content='1======================================================================\\n\\nINDEX\\n\\nLinux for Tegra\\n\\nIndex of README files\\n\\n=======================================================================\\n\\nThis directory contains various documentation for Linux for Tegra:\\n\\nREADME\\n\\nusb\\n\\ndev\\n\\nmode.txt:\\n\\nDescribes how Linux for Tegra configures the Tegra USB flashing port in USB device mode, and the features that are exposed to a connected host PC, which include network and virtual serial port connectivity.\\n\\nREADME\\n\\nwifi.txt:\\n\\nDescribes how to connect Jetson to a WiFi network using command-line tools. This is useful when connecting to Jetson using ssh over the USB device mode network connection.\\n\\nREADME\\n\\nvnc.txt:\\n\\nDescribes how to configure Jetson to run a VNC server using command-line tools. This configuration can be performed using ssh over the USB device mode network connection. This allows use of graphical applications without attaching an HDMI display, keyboard, and mouse to Jetson.' metadata={'source': '/data/documents/L4T-README/INDEX.txt'}\n"
+ ]
+ }
+ ],
+ "source": [
+ "documents = load_documents()\n",
+ "print(len(documents))\n",
+ "print(documents[0])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "de6ce522-e9f9-4521-afb3-788df1e37ef1",
+ "metadata": {},
+ "source": [
+ "## Split The Documents "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "0ec0ab31-8796-45fe-b445-4575698576de",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
+ "from langchain.schema.document import Document\n",
+ "\n",
+ "def split_documents(documents: list[Document]):\n",
+ " text_splitter = RecursiveCharacterTextSplitter(\n",
+ " chunk_size=800,\n",
+ " chunk_overlap=50,\n",
+ " length_function=len,\n",
+ " is_separator_regex=False,\n",
+ " )\n",
+ " return text_splitter.split_documents(documents)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "274f4d4e-af46-4596-857a-aa478a1563d8",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "27\n",
+ "page_content='1======================================================================\\n\\nINDEX\\n\\nLinux for Tegra\\n\\nIndex of README files\\n\\n=======================================================================\\n\\nThis directory contains various documentation for Linux for Tegra:\\n\\nREADME\\n\\nusb\\n\\ndev\\n\\nmode.txt:\\n\\nDescribes how Linux for Tegra configures the Tegra USB flashing port in USB device mode, and the features that are exposed to a connected host PC, which include network and virtual serial port connectivity.\\n\\nREADME\\n\\nwifi.txt:\\n\\nDescribes how to connect Jetson to a WiFi network using command-line tools. This is useful when connecting to Jetson using ssh over the USB device mode network connection.\\n\\nREADME\\n\\nvnc.txt:' metadata={'source': '/data/documents/L4T-README/INDEX.txt'}\n"
+ ]
+ }
+ ],
+ "source": [
+ "chunks = split_documents(documents)\n",
+ "print(len(chunks))\n",
+ "print(chunks[0])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "11bdce8c-f27d-4aee-a785-46b14f218804",
+ "metadata": {},
+ "source": [
+ "## Embedding Models\n",
+ "\n",
+ "There are couple of options for the embedding model."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9154e5f8-aa7e-4212-98c5-c439b1247410",
+ "metadata": {},
+ "source": [
+ "### Running **Local** Embedding Model\n",
+ "\n",
+ "#### Option 1: Open embedding model running locally\n",
+ "\n",
+ "Besides the LLM, you can run the embedding model locally on Jetson as well.\n",
+ "\n",
+ "Example embedding models available on `ollama` are listed on an [Ollama blog article](https://ollama.com/blog/embedding-models).\n",
+ "\n",
+ "| Model | Parameter Size | Link |\n",
+ "| ----- | --------------:| ---- |\n",
+ "|`mxbai-embed-large`|334M|[link](https://ollama.com/library/mxbai-embed-large)|\n",
+ "|`snowflake-arctic-embed`|334M|[link](https://ollama.com/library/snowflake-arctic-embed)|\n",
+ "|`nomic-embed-text`|137M|[link](https://ollama.com/library/nomic-embed-text)|\n",
+ "|`all-minilm`|23M|[link](https://ollama.com/library/all-minilm)|\n",
+ "\n",
+ "Here, we try `mxbai-embed-large`, which had proved to generate good enough embeddings for our sample documents."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6f33da5e-3122-49a3-b837-852950abe486",
+ "metadata": {},
+ "source": [
+ "First, check you have already downloaded the embedding model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "7d57c73e-2556-406e-be8b-feea695ec81e",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "NAME \tID \tSIZE \tMODIFIED \n",
+ "mxbai-embed-large:latest\t468836162de7\t669 MB\t45 minutes ago\t\n",
+ "llama3:70b \tbe39eb53a197\t39 GB \t16 hours ago \t\n",
+ "llama3:latest \ta6990ed6be41\t4.7 GB\t16 hours ago \t\n",
+ "nomic-embed-text:latest \t0a109f422b47\t274 MB\t23 hours ago \t\n",
+ "llama2:latest \t78e26419b446\t3.8 GB\t10 days ago \t\n",
+ "mistral:latest \t61e88e884507\t4.1 GB\t2 weeks ago \t\n",
+ "llama2:70b \te7f6c06ffef4\t38 GB \t2 weeks ago \t\n",
+ "llama2:13b \td475bf4c50bc\t7.4 GB\t2 weeks ago \t\n"
+ ]
+ }
+ ],
+ "source": [
+ "!ollama list"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2209208e-7ba6-46b8-8954-222591677916",
+ "metadata": {},
+ "source": [
+ "If not, use `ollama pull` command to pull the model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9b9c94e8-9a4a-40c6-a032-08dfdda40a7f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!ollama pull nomic-embed-text"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "70dfc81e-a824-4db2-b6b2-111c676d2b98",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from langchain_community.embeddings.ollama import OllamaEmbeddings\n",
+ "\n",
+ "def get_embedding_model():\n",
+ " embedding_model = OllamaEmbeddings(model=\"mxbai-embed-large\")\n",
+ " return embedding_model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "61fb24d6-9974-49ba-8c7f-99656fe46a7f",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "### Using Cloud Embedding Model\n",
+ "\n",
+ "Some people believe cloud hosted embedding modles perform more accurately.\n",
+ "\n",
+ "#### Option 2: OpenAI Embedding model\n",
+ "\n",
+ "[API Reference: `OpenAIEbmeddings`](https://api.python.langchain.com/en/latest/embeddings/langchain_openai.embeddings.base.OpenAIEmbeddings.html)\n",
+ "\n",
+ "> **Remember to put YOUR own OpenAI API key in the following cell.** "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5863023e-787c-43b8-8994-c6a960927c14",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "OPENAI_API_KEY = \"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "48fbb2a1-1526-4e01-9125-e9b8b63c3bd2",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from langchain_openai import OpenAIEmbeddings\n",
+ "\n",
+ "def get_embedding_model():\n",
+ " embedding_model = OpenAIEmbeddings(\n",
+ " model = \"text-embedding-3-large\",\n",
+ " openai_api_key = OPENAI_API_KEY\n",
+ " )\n",
+ " return embedding_model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5ce566bf-c6bd-4396-99d7-5bd670a3b8d9",
+ "metadata": {},
+ "source": [
+ "#### Option 3: AWS Bedrock Embedding model\n",
+ "\n",
+ "[API Reference: `langchain_community.embeddings.bedrock`](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.bedrock.Bedrock.html)\n",
+ "\n",
+ "> **Note, you need to set up AWS profile separately.**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5ff88cb0-5475-4e14-968a-e3f92f729c04",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from langchain_community.embeddings.bedrock import BedrockEmbeddings\n",
+ "\n",
+ "def get_embedding_model():\n",
+ " embedding_model = BedrockEmbeddings(\n",
+ " credentials_profile_name=\"default\", region_name=\"us-east-1\"\n",
+ " )\n",
+ " return embedding_model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4886a89e-9721-4294-ab57-0c8c807e6386",
+ "metadata": {},
+ "source": [
+ "## Creating The Vector Store\n",
+ "\n",
+ "We are going to create the vector store with embeddings and save it in a directory as files.
\n",
+ "Here, the directory is defined to be \"**chromadb**\"."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "c839a7a0-da54-4d41-9e46-b900288b6b60",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "CHROMA_PATH = \"chromadb\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3a0da60e-c833-4e36-aef0-098534c394fe",
+ "metadata": {},
+ "source": [
+ "Remove the directory if it has previously been created and populated.\n",
+ "\n",
+ "> If you are re-running with different embedding model, removing the persisted directory may not be enough.
The work-around would be to restart the Python kernel.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "342409dc-4726-4192-8eff-904848724c7a",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "%%bash -s \"$CHROMA_PATH\"\n",
+ "rm -rf $1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "eabafded-e353-497d-9083-22c2af44f83a",
+ "metadata": {},
+ "source": [
+ "### Vector store to be created with embedding model specified"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "140646fa-9214-42d6-973c-ede42e1ffd3d",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from langchain.vectorstores.chroma import Chroma\n",
+ "\n",
+ "def add_to_chroma(chunks: list[Document]):\n",
+ " vectorstore = Chroma(\n",
+ " persist_directory=CHROMA_PATH, \n",
+ " embedding_function=get_embedding_model()\n",
+ " )\n",
+ " vectorstore.add_documents(chunks)\n",
+ " vectorstore.persist()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "2cf77a7b-9065-4cf0-af54-fdebf0d26264",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
+ "/usr/local/lib/python3.10/dist-packages/langchain_core/_api/deprecation.py:119: LangChainDeprecationWarning: Since Chroma 0.4.x the manual persistence method is no longer supported as docs are automatically persisted.\n",
+ " warn_deprecated(\n"
+ ]
+ }
+ ],
+ "source": [
+ "add_to_chroma(chunks)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b58fe9ab-2b19-40c4-8e65-9aa791264b2b",
+ "metadata": {},
+ "source": [
+ "Let's check what files are saved and the size of each file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "c6d87c22-c199-4c37-9d89-8cd5e1c43721",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "4.0K\t./chromadb/451ecaa3-56ba-4076-afb8-793a024dce9e/length.bin\n",
+ "4.0K\t./chromadb/451ecaa3-56ba-4076-afb8-793a024dce9e/header.bin\n",
+ "4.1M\t./chromadb/451ecaa3-56ba-4076-afb8-793a024dce9e/data_level0.bin\n",
+ "0\t./chromadb/451ecaa3-56ba-4076-afb8-793a024dce9e/link_lists.bin\n",
+ "4.1M\t./chromadb/451ecaa3-56ba-4076-afb8-793a024dce9e\n",
+ "404K\t./chromadb/chroma.sqlite3\n",
+ "4.5M\t./chromadb\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%bash -s \"$CHROMA_PATH\"\n",
+ "du -ah ./$1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "40432bc9-56fc-4e5e-9215-45357e68a528",
+ "metadata": {},
+ "source": [
+ "## Running RAG Query Locally"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5b9c45d9-3ffe-4ec6-a26d-977c0773ef4f",
+ "metadata": {},
+ "source": [
+ "Below defines the template for the prompt to eventually sent to our LLM."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "9168af39-7e5d-4214-b5c4-98e606eac77c",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "PROMPT_TEMPLATE = \"\"\"\n",
+ "Answer the question based only on the following context:\n",
+ "{context}\n",
+ "\n",
+ "---\n",
+ "Answer the question based on the above context: {question}\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d1d44eaf-78f1-4041-a495-38d120cd3287",
+ "metadata": {},
+ "source": [
+ "The actual question is supplied as below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "06efab23-a279-4f25-9751-f3b75721f8ee",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "query_text=\"What IPv4 address Jetson device gets assigned when connected to a PC with a USB cable? \\\n",
+ " And what file to edit in order to change the IP address to be assigned to Jetson itself in USB device mode? \\\n",
+ " Plesae state which section you find the answer for each question.\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "384822c9-346c-4a84-9bdf-3919dbbdeca3",
+ "metadata": {},
+ "source": [
+ "### Load vector store from persisted files with embedding model specified"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "6a3b4c0c-4606-41d2-996b-c1629b0cd778",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "vectorstore = Chroma(\n",
+ " persist_directory=CHROMA_PATH, \n",
+ " embedding_function=get_embedding_model()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c1414545-0a99-4f5c-a2c6-8d5d1ddcbdd0",
+ "metadata": {},
+ "source": [
+ "### Search the vector store for retrieving relevant context\n",
+ "\n",
+ "Top 5 relevant chunks are retrieved."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "3ff520e3-da35-4051-a05b-7d9ade16aa8d",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "results = vectorstore.similarity_search_with_score(query_text, k=5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "fc4dd700-6c1b-45b9-aa81-2306d89f9e4a",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from langchain.prompts import ChatPromptTemplate\n",
+ "\n",
+ "context_text = \"\\n\\n---\\n\\n\".join([doc.page_content for doc, _score in results])\n",
+ "prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)\n",
+ "prompt = prompt_template.format(context=context_text, question=query_text)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dc0d1b1a-4cc7-4759-abe8-f2c2787d6dfc",
+ "metadata": {},
+ "source": [
+ "Final `prompt` is generated with context filled.\n",
+ "\n",
+ "If you decide to print out the generated prompt, run the following cell."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "76e650c5-219e-4413-989d-935a3a631770",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Human: \n",
+ "Answer the question based only on the following context:\n",
+ "Linux for Tegra assigns a static IPv4 address of 192.168.55.1 to Jetson, and runs a DHCP server to automatically assign an IPv4 address of 192.168.55.100 to your host machine. This provides point-to-point connectivity. If a Jetson device experiences very high CPU or disk IO load, this DHCP server may fail to respond in a timely manner to requests from the host machine. This may cause IPv4 connections to drop. If this problem occurs, configure your host machine to use a static IPv4 address of 192.168.55.100 with netmask 255.255.255.0 and no gateway or DNS servers.\n",
+ "\n",
+ "---\n",
+ "\n",
+ "If you connect multiple Jetson devices to the same host machine, each Jetson device uses the same IPv4 address. This prevents IPv4-based communication with all but one Jetson device, since your host operating system determines which Jetson device it communicates with. To solve this, edit the Jetson-based script that sets up the network and assign a unique network address to each Jetson device. IPv6-based communication works with any number of Jetson devices without modifying the aforementioned script.\n",
+ "\n",
+ "Once the Ethernet device is configured, use SSH to connect to the Jetson device.\n",
+ "\n",
+ "SSH is natively available on Linux and Macintosh operating systems. Connect using IPv6 with the following command:\n",
+ "\n",
+ "ssh nvidia@fe80::1%usb0\n",
+ "\n",
+ "Connect using IPv4 with the following command:\n",
+ "\n",
+ "---\n",
+ "\n",
+ "Linux for Tegra assigns a link-local IPv6 address of fe80::1 to the USB Ethernet device(s). You should not have to configure your host machine in order to use an IPv6 link-local address. If you connect multiple Jetson devices to the same host machine, each Jetson device uses the same IPv6 link-local address. You may still connect to any connected Jetson device, however, since use of IPv6 link-local addresses also requires specifying which network interface to connect over, and there is a unique interface for each Jetson.\n",
+ "\n",
+ "---\n",
+ "\n",
+ "Ethernet on Mac ---------------------------------------------------------------------- An NCM USB Ethernet device is created, and the required driver is automatically activated. Since the Mac operating system does not support the RNDIS device, the device is not activated by default.\n",
+ "\n",
+ "Changing the IPv4 Address ---------------------------------------------------------------------- Edit /opt/nvidia/l4t-usb-device-mode/nv-l4t-usb-device-mode-config.sh on Jetson to change the IPv4 network parameters. The following variables must be changed, and must maintain consistent values: - net_ip - net_mask - net_net - net_dhcp_start - net_dhcp_end\n",
+ "\n",
+ "---\n",
+ "\n",
+ "Connect using IPv4 with the following command:\n",
+ "\n",
+ "ssh nvidia@192.168.55.1\n",
+ "\n",
+ "On Windows, use the PuTTY application.\n",
+ "\n",
+ "Ethernet on Linux ---------------------------------------------------------------------- Two USB Ethernet devices are created and the required drivers are automatically activated. These devices are typically named usb0 and usb1, depending on how your Linux distribution is configured, and what other hardware is attached to the system. To avoid the potential for interference between the two USB Ethernet devices, it is best, but not strictly required, to use your host's network configuration tool (e.g. Network Manager), to disable one of the two USB Ethernet interfaces implemented by the Jetson device.\n",
"\n",
- "llama_print_timings: load time = 1680.75 ms\n",
- "llama_print_timings: sample time = 201.61 ms / 256 runs ( 0.79 ms per token, 1269.78 tokens per second)\n",
- "llama_print_timings: prompt eval time = 1680.49 ms / 17 tokens ( 98.85 ms per token, 10.12 tokens per second)\n",
- "llama_print_timings: eval time = 16994.37 ms / 255 runs ( 66.64 ms per token, 15.00 tokens per second)\n",
- "llama_print_timings: total time = 19942.30 ms\n"
+ "View available Ethernet devices with the following command:\n",
+ "\n",
+ "/sbin/ifconfig\n",
+ "\n",
+ "---\n",
+ "Answer the question based on the above context: What IPv4 address Jetson device gets assigned when connected to a PC with a USB cable? And what file to edit in order to change the IP address to be assigned to Jetson itself in USB device mode? Plesae state which section you find the answer for each question.\n",
+ "\n"
]
- },
+ }
+ ],
+ "source": [
+ "print(prompt)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ae09240a-fc5e-4ff4-ad28-3860f86526fa",
+ "metadata": {},
+ "source": [
+ "### Define LLM\n",
+ "\n",
+ "Define the local LLM using `Ollama` to be invoked with the prompt."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "e4bff3e0-e38e-44ce-92e9-28a82c9aed57",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from langchain_community.llms.ollama import Ollama\n",
+ "\n",
+ "model = Ollama(model=\"llama3\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1c2f99d9-4c87-4c26-9ca0-36e9ceaed438",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "If you have not downloaded `llama3` model and the above cell failed, run the following cell and come back to execute the above cell again."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "36920eb4-c80a-497f-a8bf-a6ed240b393e",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "!ollama pull llama3"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "03d47f1a-2fc8-4797-9977-0401a445cfde",
+ "metadata": {},
+ "source": [
+ "#### Alternative: Using OpenAI LLM\n",
+ "\n",
+ "In case you wanted to try OpenAI LLM, you can run the following cell. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b7690356-2ce9-43bd-8bcb-51dcee0ba79f",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from langchain_openai import OpenAI\n",
+ "\n",
+ "model = OpenAI(\n",
+ " model=\"gpt-3.5-turbo-instruct\",\n",
+ " openai_api_key = OPENAI_API_KEY\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cc0bc50e-631e-4357-9f06-728df550c45b",
+ "metadata": {},
+ "source": [
+ "#### Running the LLM \n",
+ "\n",
+ "The following cell runs the Llama3 model on Ollama with the prompt.\n",
+ "\n",
+ "> If you open a Terminal on the side (you can dock a pane on the side on JupyterLab) and runs `jtop`, you can check the GPU and other resource usage."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "7dceda73-3665-4ec2-bed0-20ab40ce68e2",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "response_text = model.invoke(prompt)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ae7d03f3-2d9b-416d-a598-b7fd88b8daf8",
+ "metadata": {},
+ "source": [
+ "Let's check the LLM output."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "8ade3c38-d92b-47a0-9d6e-b6ba2d77d928",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
{
"data": {
+ "text/markdown": [
+ "Based on the provided context:\n",
+ "\n",
+ "**What IPv4 address Jetson device gets assigned when connected to a PC with a USB cable?**\n",
+ "\n",
+ "According to Section \"Linux for Tegra\", Linux for Tegra assigns a static IPv4 address of 192.168.55.1 to Jetson, and runs a DHCP server to automatically assign an IPv4 address of 192.168.55.100 to the host machine.\n",
+ "\n",
+ "**What file to edit in order to change the IP address to be assigned to Jetson itself in USB device mode?**\n",
+ "\n",
+ "According to Section \"Changing the IPv4 Address\", you need to edit `/opt/nvidia/l4t-usb-device-mode/nv-l4t-usb-device-mode-config.sh` on Jetson to change the IPv4 network parameters."
+ ],
"text/plain": [
- "\"\\nCreate an itinerary for a 3-day trip to San Francisco, California. Include the following:\\nDay 1:\\n- Morning: Visit Fisherman's Wharf and take a ferry to Alcatraz Island.\\n- Afternoon: Explore Golden Gate Park and visit the de Young Museum.\\n- Evening: Enjoy dinner at North Beach.\\n\\nDay 2:\\n- Morning: Take a trip to Muir Woods National Monument to see the redwood forests.\\n- Afternoon: Visit the Exploratorium science museum and take a walk along the pier.\\n- Evening: Attend a performance at the San Francisco Symphony.\\n\\nDay 3:\\n- Morning: Take a cable car ride to the top of Nob Hill.\\n- Afternoon: Visit the California Palace of the Legion of Honor and the Japanese Tea Garden.\\n- Evening: Enjoy dinner at one of the many excellent seafood restaurants in the city.\\n\\nNote: This is just a sample itinerary and can be adjusted to fit your interests and schedule. Additionally, consider purchasing a San Francisco CityPASS\""
+ ""
]
},
- "execution_count": 2,
"metadata": {},
- "output_type": "execute_result"
+ "output_type": "display_data"
}
],
"source": [
- "llm(\"Create an itinerary 3-day trip to San Francisco, California.\")"
+ "from IPython.display import display, Markdown, Latex\n",
+ "display(Markdown(response_text))"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "d85c2a9c-1771-4533-84a3-afca8489a0e4",
+ "id": "4a772677-e1dd-4272-85a3-324f9b9ac7b5",
"metadata": {},
"outputs": [],
"source": []
@@ -151,7 +840,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.10"
+ "version": "3.10.12"
}
},
"nbformat": 4,
diff --git a/packages/rag/langchain/config.py b/packages/rag/langchain/config.py
index 0ef49db0a..8c0471ddb 100644
--- a/packages/rag/langchain/config.py
+++ b/packages/rag/langchain/config.py
@@ -3,7 +3,7 @@
samples['name'] = 'langchain:samples'
samples['dockerfile'] = 'Dockerfile.samples'
-samples['depends'] = ['langchain:main', 'jupyterlab']
+samples['depends'] = ['langchain:main', 'jupyterlab', 'ollama']
del samples['alias']
diff --git a/packages/rag/langchain/pixegami-rag-tutorial-v2.ipynb b/packages/rag/langchain/pixegami-rag-tutorial-v2.ipynb
new file mode 100644
index 000000000..235f9e6cb
--- /dev/null
+++ b/packages/rag/langchain/pixegami-rag-tutorial-v2.ipynb
@@ -0,0 +1,602 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "d755f3e0-de66-4ecf-83d0-535310d13e59",
+ "metadata": {},
+ "source": [
+ "# LangChain RAG with Locall LLM Tutorial\n",
+ "\n",
+ "This is based on Pixegami's tutorial. ([original repo](https://github.com/pixegami/rag-tutorial-v2/))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a3dc8c87-e26e-4064-805e-1d7e6058f8c4",
+ "metadata": {},
+ "source": [
+ "## Loading The Data\n",
+ "\n",
+ "More on Document loaders --> [official document](https://python.langchain.com/v0.1/docs/modules/data_connection/document_loaders/)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "be0c3b2f-e107-4166-ba6b-970e10b33e20",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "!git clone https://github.com/pixegami/rag-tutorial-v2/\n",
+ "!mv rag-tutorial-v2/data ./\n",
+ "!rm -rf rag-tutorial-v2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "71e2a607-33c2-4efa-b28b-2b21889c3c92",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from langchain.document_loaders.pdf import PyPDFDirectoryLoader\n",
+ "\n",
+ "DATA_PATH = \"data\"\n",
+ "\n",
+ "def load_documents():\n",
+ " document_loader = PyPDFDirectoryLoader(DATA_PATH)\n",
+ " return document_loader.load()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "a8a71ca0-aea3-4d81-a081-1efe54927609",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "12\n",
+ "page_content='On a blustery autumn evening five old friends met in the backroom of one of the city’s oldest and most private clubs. Each had\\ntraveled a long distance — from all corners of the world — to meet on this very specific day… October 2, 1900 — 28 years to the\\nday that the London eccentric, Phileas Fogg accepted and then won a £20,000 bet that he could travel Around the World in 80 Days . \\nWhen the story of Fogg’s triumphant journey filled all the newspapers of the day, the five attended University together. Inspired by\\nhis impetuous gamble, and a few pints from the local pub, the group commemorated his circumnavigation with a more modest excur-sion and wager – a bottle of good claret to the first to make it to Le Procope in Paris.\\nEach succeeding year, they met to celebrate the anniversary and pay tribute to Fogg. And each year a new expedition (always mor e\\ndifficult) with a new wager (always more expensive) was proposed. Now at the dawn of the century it was time for a new impossi-ble journey. The stakes: $1 Million in a winner-takes-all competition. The objective: to see which of them could travel by rail to themost cities in North America — in just 7 days. The journey would begin immediately…\\nTicket to Ride is a cross-country train adventure. Players compete to connect different cities by laying claim to railway routes on a\\nmap of North America.\\nFor 2 - 5 players \\nages 8 and above\\n30 - 60 minutes[T2R] rules EN reprint 2015_TTR2 rules US 06/03/15 17:36 Page2' metadata={'source': 'data/ticket_to_ride.pdf', 'page': 0}\n"
+ ]
+ }
+ ],
+ "source": [
+ "documents = load_documents()\n",
+ "print(len(documents))\n",
+ "print(documents[0])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a369d229-ad09-49db-aa6b-b5ea62db3382",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Split The Documents "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "eec69272-6fec-46ea-8310-8b2a87cd32f2",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
+ "from langchain.schema.document import Document\n",
+ "\n",
+ "def split_documents(documents: list[Document]):\n",
+ " text_splitter = RecursiveCharacterTextSplitter(\n",
+ " chunk_size=800,\n",
+ " chunk_overlap=80,\n",
+ " length_function=len,\n",
+ " is_separator_regex=False,\n",
+ " )\n",
+ " return text_splitter.split_documents(documents)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "f02190a6-4478-4eb1-ae7a-ef6888d4375e",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "41\n",
+ "page_content='On a blustery autumn evening five old friends met in the backroom of one of the city’s oldest and most private clubs. Each had\\ntraveled a long distance — from all corners of the world — to meet on this very specific day… October 2, 1900 — 28 years to the\\nday that the London eccentric, Phileas Fogg accepted and then won a £20,000 bet that he could travel Around the World in 80 Days . \\nWhen the story of Fogg’s triumphant journey filled all the newspapers of the day, the five attended University together. Inspired by\\nhis impetuous gamble, and a few pints from the local pub, the group commemorated his circumnavigation with a more modest excur-sion and wager – a bottle of good claret to the first to make it to Le Procope in Paris.' metadata={'source': 'data/ticket_to_ride.pdf', 'page': 0}\n"
+ ]
+ }
+ ],
+ "source": [
+ "chunks = split_documents(documents)\n",
+ "print(len(chunks))\n",
+ "print(chunks[0])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e42e848c-f668-437c-a6ce-72dbad72734b",
+ "metadata": {},
+ "source": [
+ "## Embedding Function\n",
+ "\n",
+ "### Using Cloud Embedding Function\n",
+ "\n",
+ "Cloud hosted embedding function generally performs better."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dec75fb5-c584-468d-97f0-5bec6cb5650a",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from langchain_community.embeddings.bedrock import BedrockEmbeddings\n",
+ "\n",
+ "def get_embedding_function():\n",
+ " embeddings = BedrockEmbeddings(\n",
+ " credentials_profile_name=\"default\", region_name=\"us-east-1\"\n",
+ " )\n",
+ " # embeddings = OllamaEmbeddings(model=\"nomic-embed-text\")\n",
+ " return embeddings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "143da522-dbda-425e-a3f8-d0f34a6c7f5a",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "### Running **Local** Embedding Function\n",
+ "\n",
+ "In case you want to run the embedding function locally as well, run the following cell instead."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "61ee6926-62e9-4eae-bb4d-75077b3f6940",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "!ollama list"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8eb84fdd-6129-413c-9f86-63361bc15327",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!ollama pull nomic-embed-text"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "8cc0274e-3958-4ae0-9f61-659ee63c3167",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from langchain_community.embeddings.ollama import OllamaEmbeddings\n",
+ "\n",
+ "def get_embedding_function():\n",
+ " embeddings = OllamaEmbeddings(model=\"nomic-embed-text\")\n",
+ " return embeddings"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c517df9e-9e60-4234-bd66-65cddcec8567",
+ "metadata": {},
+ "source": [
+ "## Creating The Database"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "3a65fbe2-afb6-4357-a8f7-e131eb1d1f7c",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from langchain.vectorstores.chroma import Chroma\n",
+ "\n",
+ "CHROMA_PATH = \"chroma\"\n",
+ "\n",
+ "def add_to_chroma(chunks: list[Document]):\n",
+ " # Load the existing database.\n",
+ " db = Chroma(\n",
+ " persist_directory=CHROMA_PATH, embedding_function=get_embedding_function()\n",
+ " )\n",
+ "\n",
+ " # Calculate Page IDs.\n",
+ " chunks_with_ids = calculate_chunk_ids(chunks)\n",
+ "\n",
+ " # Add or Update the documents.\n",
+ " existing_items = db.get(include=[]) # IDs are always included by default\n",
+ " existing_ids = set(existing_items[\"ids\"])\n",
+ " print(f\"Number of existing documents in DB: {len(existing_ids)}\")\n",
+ "\n",
+ " # Only add documents that don't exist in the DB.\n",
+ " new_chunks = []\n",
+ " for chunk in chunks_with_ids:\n",
+ " if chunk.metadata[\"id\"] not in existing_ids:\n",
+ " new_chunks.append(chunk)\n",
+ "\n",
+ " if len(new_chunks):\n",
+ " print(f\"👉 Adding new documents: {len(new_chunks)}\")\n",
+ " new_chunk_ids = [chunk.metadata[\"id\"] for chunk in new_chunks]\n",
+ " db.add_documents(new_chunks, ids=new_chunk_ids)\n",
+ " db.persist()\n",
+ " else:\n",
+ " print(\"✅ No new documents to add\")\n",
+ " \n",
+ "\n",
+ "def calculate_chunk_ids(chunks):\n",
+ "\n",
+ " # This will create IDs like \"data/monopoly.pdf:6:2\"\n",
+ " # Page Source : Page Number : Chunk Index\n",
+ "\n",
+ " last_page_id = None\n",
+ " current_chunk_index = 0\n",
+ "\n",
+ " for chunk in chunks:\n",
+ " source = chunk.metadata.get(\"source\")\n",
+ " page = chunk.metadata.get(\"page\")\n",
+ " current_page_id = f\"{source}:{page}\"\n",
+ "\n",
+ " # If the page ID is the same as the last one, increment the index.\n",
+ " if current_page_id == last_page_id:\n",
+ " current_chunk_index += 1\n",
+ " else:\n",
+ " current_chunk_index = 0\n",
+ "\n",
+ " # Calculate the chunk ID.\n",
+ " chunk_id = f\"{current_page_id}:{current_chunk_index}\"\n",
+ " last_page_id = current_page_id\n",
+ "\n",
+ " # Add it to the page meta-data.\n",
+ " chunk.metadata[\"id\"] = chunk_id\n",
+ "\n",
+ " return chunks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "535c3043-d0f9-4ba0-953d-43e1ebbe4abf",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of existing documents in DB: 41\n",
+ "✅ No new documents to add\n"
+ ]
+ }
+ ],
+ "source": [
+ "add_to_chroma(chunks)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "ac825264-bf17-4efb-9851-0abdca61e2ac",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "4.0K\t./chroma/ef219065-076e-496e-be5b-b3fa0deb744a/length.bin\n",
+ "4.0K\t./chroma/ef219065-076e-496e-be5b-b3fa0deb744a/header.bin\n",
+ "3.1M\t./chroma/ef219065-076e-496e-be5b-b3fa0deb744a/data_level0.bin\n",
+ "0\t./chroma/ef219065-076e-496e-be5b-b3fa0deb744a/link_lists.bin\n",
+ "3.1M\t./chroma/ef219065-076e-496e-be5b-b3fa0deb744a\n",
+ "552K\t./chroma/chroma.sqlite3\n",
+ "3.7M\t./chroma\n"
+ ]
+ }
+ ],
+ "source": [
+ "!du -ah ./chroma"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6e9626c3-db95-4eed-a0b4-cfd574b370c6",
+ "metadata": {},
+ "source": [
+ "## Running RAG Query Locally"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d8337451-c6b3-43a1-8d43-bc32b0d339c9",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from langchain.prompts import ChatPromptTemplate\n",
+ "from langchain_community.llms.ollama import Ollama\n",
+ "\n",
+ "CHROMA_PATH = \"chroma\"\n",
+ "\n",
+ "PROMPT_TEMPLATE = \"\"\"\n",
+ "Answer the question based only on the following context:\n",
+ "{context}\n",
+ "\n",
+ "---\n",
+ "Answer the question based on the above context: {question}\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e863e782-48ca-4e18-9935-724983dc69b6",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "embedding_function = get_embedding_function()\n",
+ "db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c6e445ec-4c87-4418-90a2-0acffda2369d",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "query_text=\"How many clues can I give in Codenames?\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "414d26fc-acfe-4f1f-8bda-83702eff3f88",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# Search the DB.\n",
+ "results = db.similarity_search_with_score(query_text, k=5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2e7a977c-591b-4b63-9fd2-5f8be3c8b357",
+ "metadata": {
+ "jupyter": {
+ "source_hidden": true
+ },
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "context_text = \"\\n\\n---\\n\\n\".join([doc.page_content for doc, _score in results])\n",
+ "prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)\n",
+ "prompt = prompt_template.format(context=context_text, question=query_text)\n",
+ "print(prompt)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "02f9e48f-c7fc-46c2-aa39-3eb946eb39b0",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "model = Ollama(model=\"mistral\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "28c1184b-cde8-4088-b1b5-dcbea7b6dc69",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "response_text = model.invoke(prompt)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d4937264-b2fe-4fae-b7cf-729618edf345",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "print(response_text)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5c662d8a-07a6-4744-a9b5-4d2e16173a74",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "sources = [doc.metadata.get(\"id\", None) for doc, _score in results]\n",
+ "formatted_response = f\"Response: {response_text}\\nSources: {sources}\"\n",
+ "print(formatted_response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0e6fcc4e-8750-43a9-9082-b9f3a43eab68",
+ "metadata": {},
+ "source": [
+ "### Function-ized"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "fd794263-718d-4bf9-9ce8-796952ba0b0c",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from langchain.prompts import ChatPromptTemplate\n",
+ "from langchain_community.llms.ollama import Ollama\n",
+ "\n",
+ "CHROMA_PATH = \"chroma\"\n",
+ "\n",
+ "PROMPT_TEMPLATE = \"\"\"\n",
+ "Answer the question based only on the following context:\n",
+ "{context}\n",
+ "\n",
+ "---\n",
+ "Answer the question based on the above context: {question}\n",
+ "\"\"\"\n",
+ "\n",
+ "def query_rag(query_text: str):\n",
+ " # Prepare the DB.\n",
+ " embedding_function = get_embedding_function()\n",
+ " db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)\n",
+ "\n",
+ " # Search the DB.\n",
+ " results = db.similarity_search_with_score(query_text, k=5)\n",
+ "\n",
+ " context_text = \"\\n\\n---\\n\\n\".join([doc.page_content for doc, _score in results])\n",
+ " prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)\n",
+ " prompt = prompt_template.format(context=context_text, question=query_text)\n",
+ " # print(prompt)\n",
+ "\n",
+ " model = Ollama(model=\"mistral\")\n",
+ " response_text = model.invoke(prompt)\n",
+ "\n",
+ " sources = [doc.metadata.get(\"id\", None) for doc, _score in results]\n",
+ " formatted_response = f\"Response: {response_text}\\nSources: {sources}\"\n",
+ " print(formatted_response)\n",
+ " return response_text"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "c3598662-a4dc-4b49-8815-a36c3c34425d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "query_text=\"How do I get out of jail in Monopoly?\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "3c648863-0ad7-489b-aa33-a57f020e2510",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Response: To get out of jail in Monopoly, you have several options: (1) throw doubles on any of your next three turns, (2) use a \"Get Out of Jail Free\" card if you have one, (3) purchase a \"Get Out of Jail Free\" card from another player and play it, or (4) pay a fine of $50 before rolling the dice on either of your next two turns. If you do not throw doubles by your third turn, you must pay the $50 fine and move forward the number of spaces shown by your next roll. You can also buy and sell property, buy and sell houses and hotels, and collect rents while in jail. However, you cannot collect your $200 salary if sent to jail during a move.\n",
+ "Sources: ['data/monopoly.pdf:1:0', 'data/monopoly.pdf:0:0', 'data/monopoly.pdf:4:1', 'data/monopoly.pdf:1:1', 'data/monopoly.pdf:4:2']\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "' To get out of jail in Monopoly, you have several options: (1) throw doubles on any of your next three turns, (2) use a \"Get Out of Jail Free\" card if you have one, (3) purchase a \"Get Out of Jail Free\" card from another player and play it, or (4) pay a fine of $50 before rolling the dice on either of your next two turns. If you do not throw doubles by your third turn, you must pay the $50 fine and move forward the number of spaces shown by your next roll. You can also buy and sell property, buy and sell houses and hotels, and collect rents while in jail. However, you cannot collect your $200 salary if sent to jail during a move.'"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "query_rag(query_text)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "97286027-0b01-44db-a8ae-665566f53984",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "query_text=\"How many clues can I give in Codenames?\""
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}