From 21bec293139ad2ff1bc78863ae8ad67cdd6cd133 Mon Sep 17 00:00:00 2001 From: Bill Shi Date: Wed, 20 Sep 2023 19:30:06 -0700 Subject: [PATCH] fix: rm langchain and dataloader notebook --- .../TigerGraph_LangChain_Demo.ipynb | 1301 ----------------- basics/data_loaders.ipynb | 761 ---------- 2 files changed, 2062 deletions(-) delete mode 100644 applications/large_language_models/TigerGraph_LangChain_Demo.ipynb delete mode 100644 basics/data_loaders.ipynb diff --git a/applications/large_language_models/TigerGraph_LangChain_Demo.ipynb b/applications/large_language_models/TigerGraph_LangChain_Demo.ipynb deleted file mode 100644 index 6059333..0000000 --- a/applications/large_language_models/TigerGraph_LangChain_Demo.ipynb +++ /dev/null @@ -1,1301 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "573df91e-c773-4f81-8344-43399b9ded00", - "metadata": {}, - "source": [ - "# Integrate TigerGraph with LangChain with pyTigerGraph\n", - "\n", - "LangChain is a Python library that integrates Large Language Models (LLMs) such as OpenAI's GPT models and Google's BARD models, among others. There are many benefits to linking a natural language interface to graph databases such as TigerGraph, as models can be grounded in their generation based off of the data the database provides. This reduces the chances of these model's responses to be inaccurate, known as *hallucination*.\n", - "\n", - "In this demo, we are going to integrate LangChain with the Python driver of TigerGraph, pyTigerGraph. We will build up various LangChain tools in order for the LLMs to best extract the information from the database." - ] - }, - { - "cell_type": "markdown", - "id": "9790e156-0bfa-44cd-9434-9a735adbc0fb", - "metadata": {}, - "source": [ - "## Install Dependencies\n", - "First, we are going to install the dependencies. This consists of LangChain and OpenAI's package." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "36631f60-4282-4757-a8e7-63b45226790e", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: langchain in /opt/conda/lib/python3.9/site-packages (0.0.190)\n", - "Requirement already satisfied: openai in /opt/conda/lib/python3.9/site-packages (0.27.7)\n", - "Requirement already satisfied: pyTigerGraph in /opt/conda/lib/python3.9/site-packages (1.4)\n", - "Requirement already satisfied: pydantic<2,>=1 in /opt/conda/lib/python3.9/site-packages (from langchain) (1.10.8)\n", - "Requirement already satisfied: numpy<2,>=1 in /opt/conda/lib/python3.9/site-packages (from langchain) (1.23.0)\n", - "Requirement already satisfied: dataclasses-json<0.6.0,>=0.5.7 in /opt/conda/lib/python3.9/site-packages (from langchain) (0.5.7)\n", - "Requirement already satisfied: requests<3,>=2 in /opt/conda/lib/python3.9/site-packages (from langchain) (2.28.1)\n", - "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /opt/conda/lib/python3.9/site-packages (from langchain) (2.0.15)\n", - "Requirement already satisfied: openapi-schema-pydantic<2.0,>=1.2 in /opt/conda/lib/python3.9/site-packages (from langchain) (1.2.4)\n", - "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /opt/conda/lib/python3.9/site-packages (from langchain) (4.0.2)\n", - "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /opt/conda/lib/python3.9/site-packages (from langchain) (3.8.3)\n", - "Requirement already satisfied: numexpr<3.0.0,>=2.8.4 in /opt/conda/lib/python3.9/site-packages (from langchain) (2.8.4)\n", - "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /opt/conda/lib/python3.9/site-packages (from langchain) (8.2.2)\n", - "Requirement already satisfied: PyYAML>=5.4.1 in /opt/conda/lib/python3.9/site-packages (from langchain) (6.0)\n", - "Requirement already satisfied: tqdm in /opt/conda/lib/python3.9/site-packages (from openai) (4.64.1)\n", - "Requirement already satisfied: validators in /opt/conda/lib/python3.9/site-packages (from pyTigerGraph) (0.20.0)\n", - "Requirement already satisfied: pyTigerDriver in /opt/conda/lib/python3.9/site-packages (from pyTigerGraph) (1.0.15)\n", - "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /opt/conda/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.1.1)\n", - "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (22.1.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.2)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.8.1)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.3)\n", - "Requirement already satisfied: marshmallow<4.0.0,>=3.3.0 in /opt/conda/lib/python3.9/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain) (3.19.0)\n", - "Requirement already satisfied: marshmallow-enum<2.0.0,>=1.5.1 in /opt/conda/lib/python3.9/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain) (1.5.1)\n", - "Requirement already satisfied: typing-inspect>=0.4.0 in /opt/conda/lib/python3.9/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain) (0.9.0)\n", - "Requirement already satisfied: typing-extensions>=4.2.0 in /opt/conda/lib/python3.9/site-packages (from pydantic<2,>=1->langchain) (4.4.0)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.9/site-packages (from requests<3,>=2->langchain) (1.26.11)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.9/site-packages (from requests<3,>=2->langchain) (2022.9.24)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.9/site-packages (from requests<3,>=2->langchain) (3.4)\n", - "Requirement already satisfied: greenlet!=0.4.17 in /opt/conda/lib/python3.9/site-packages (from SQLAlchemy<3,>=1.4->langchain) (2.0.2)\n", - "Requirement already satisfied: decorator>=3.4.0 in /opt/conda/lib/python3.9/site-packages (from validators->pyTigerGraph) (5.1.1)\n", - "Requirement already satisfied: packaging>=17.0 in /opt/conda/lib/python3.9/site-packages (from marshmallow<4.0.0,>=3.3.0->dataclasses-json<0.6.0,>=0.5.7->langchain) (21.3)\n", - "Requirement already satisfied: mypy-extensions>=0.3.0 in /opt/conda/lib/python3.9/site-packages (from typing-inspect>=0.4.0->dataclasses-json<0.6.0,>=0.5.7->langchain) (1.0.0)\n", - "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.9/site-packages (from packaging>=17.0->marshmallow<4.0.0,>=3.3.0->dataclasses-json<0.6.0,>=0.5.7->langchain) (3.0.9)\n" - ] - } - ], - "source": [ - "!pip install langchain openai" - ] - }, - { - "cell_type": "markdown", - "id": "b0917133-b028-436a-902d-11ded876c8f5", - "metadata": {}, - "source": [ - "## Configure Connection to OpenAI and TigerGraph Database\n", - "\n", - "Here, we are going to set our OpenAI API key as an environment variable for use by the LangChain library. **NOTE: You must provide your own OpenAI API key in the cell below.** Additionally, we are going to connect to our TigerGraph database, which has the OGB_MAG citation network loaded into it." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "1b84ed3f-447a-4350-b511-7a658516a82e", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "os.environ[\"OPENAI_API_KEY\"] = \"OPEN_AI_API_KEY_HERE\"" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "75794f08-da59-461d-9bbf-703f95cd0abd", - "metadata": {}, - "outputs": [], - "source": [ - "from pyTigerGraph import TigerGraphConnection\n", - "from pyTigerGraph.datasets import Datasets\n", - "import json\n", - "\n", - "# Read in DB configs\n", - "with open('./config.json', \"r\") as config_file:\n", - " config = json.load(config_file)\n", - " \n", - "conn = TigerGraphConnection(\n", - " host=config[\"host\"],\n", - " username=config[\"username\"],\n", - " password=config[\"password\"],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4cf53bd9", - "metadata": {}, - "outputs": [], - "source": [ - "dataset = Datasets(\"OGB_MAG\")\n", - "conn.ingestDataset(dataset, getToken=config[\"getToken\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "457a7e75-6dc4-4017-8284-fbc2008df77b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\"Using graph 'OGB_MAG'\\nSuccessfully dropped queries on the graph 'OGB_MAG': [get_papers_of_author].\\nSuccessfully dropped queries on the graph 'OGB_MAG': [author_at_institutions].\\nSuccessfully dropped queries on the graph 'OGB_MAG': [tg_pagerank].\"" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Cleanup from previous executions of the notebook\n", - "\n", - "conn.gsql(\"\"\"USE GRAPH OGB_MAG\n", - " DROP QUERY get_papers_of_author\n", - " DROP QUERY author_at_institutions\n", - " DROP QUERY tg_pagerank\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "d47d02b0-0d9f-4b55-b9fe-b75629271eea", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'FieldOfStudy': 59965,\n", - " 'Institution': 8740,\n", - " 'Paper': 736389,\n", - " 'Author': 1134649}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "conn.getVertexCount(\"*\")" - ] - }, - { - "cell_type": "markdown", - "id": "26db7398-1b03-4741-9b8d-38f23edc44b3", - "metadata": {}, - "source": [ - "## Creating the LangChain Tools (Part 1)\n", - "\n", - "We are going to iterate through the building and testing of LangChain tools, progressively adding more functionality to the agent. First, we import the needed LangChain dependencies. Then, we are going to build three core tools: **MapQuestionToSchema**, **GenerateFunction**, and **ExecuteFunction**." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "575470a3-2845-446f-9f79-f042bb344f9d", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.agents import AgentType, initialize_agent, load_tools\n", - "from langchain.chat_models import ChatOpenAI\n", - "from langchain.llms.base import LLM\n", - "from langchain.tools import BaseTool, StructuredTool, Tool, tool\n", - "from langchain.tools.base import ToolException\n", - "from langchain.chains import LLMChain\n", - "from langchain import OpenAI\n", - "from langchain.prompts import PromptTemplate\n", - "from langchain.callbacks.manager import AsyncCallbackManagerForToolRun, CallbackManagerForToolRun\n", - "from typing import Optional, Type" - ] - }, - { - "cell_type": "markdown", - "id": "17e832bc-e130-457a-b1cb-8709b20d7924", - "metadata": {}, - "source": [ - "### Creating the MapQuestionToSchema Tool\n", - "We want to map a user's question in natural language to the specific graph schema. For example, a question such as \"How many writers are there?\" should translate to \"How many Author Vertices are there?\", as there is not a schema element for \"Writers\", but there is one for \"Author\".\n", - "\n", - "We implement this via another call to a large language model, while passing the original question along with the schema elements retrieved from the graph connection. Specifically, we use pyTigerGraph's `getVertexTypes()` and `getEdgeTypes()` functions to gather schema information to be passed to the LLM." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "5fda0b4d-7c56-4a7c-9476-d33b35a1207a", - "metadata": {}, - "outputs": [], - "source": [ - "class MapQuestionToSchema(BaseTool):\n", - " name = \"MapQuestionToSchema\"\n", - " description = \"Always run first to map the query to the graph's schema\"\n", - " conn: TigerGraphConnection = None\n", - " llm: LLM = None\n", - " handle_tool_error: bool = True\n", - " \n", - " def __init__(self, conn, llm):\n", - " super().__init__()\n", - " self.conn = conn\n", - " self.llm = llm\n", - " \n", - " def _run(self, query: str) -> str:\n", - " \"\"\"Use the tool.\"\"\"\n", - " question_restate = \"\"\"Replace the entites mentioned in the question to one of these choices: {vertices}.\n", - " Replace the relationships mentioned in the question to one of these choices: {edges}.\n", - " Generate the complete question with the appropriate replacements. Keep the case of the schema elements the same.\n", - " If there are no replacements to be made, restate the question.\n", - " Example: How many universities are there?\n", - " Response: How many vertices are University Vetexes?\n", - " Example: What is the schema?\n", - " Response: What is the schema?\n", - " Example: How many transactions are there?\n", - " Response: How many TRANSACTION Edges are there?\n", - " QUESTION: {question}\n", - " RESTATED: \"\"\"\n", - "\n", - " RESTATE_QUESTION_PROMPT = PromptTemplate(\n", - " template=question_restate, input_variables=[\"question\", \"vertices\", \"edges\"]\n", - " )\n", - " \n", - " restate_chain = LLMChain(llm=self.llm, prompt=RESTATE_QUESTION_PROMPT)\n", - " \n", - " restate_q = restate_chain.apply([{\"vertices\": [x + \" Vertex\" for x in self.conn.getVertexTypes()], # + [x + \" Edge\" for x in conn.getEdgeTypes()],\n", - " \"question\": query,\n", - " \"edges\": [x + \" Edge\" for x in self.conn.getEdgeTypes()]}])[0][\"text\"]\n", - " return restate_q\n", - " \n", - " async def _arun(self) -> str:\n", - " \"\"\"Use the tool asynchronously.\"\"\"\n", - " raise NotImplementedError(\"custom_search does not support async\")\n", - " \n", - " def _handle_error(self, error:ToolException) -> str:\n", - " return \"The following errors occurred during tool execution:\" + error.args[0]+ \"Please ask for human input if they asked their question correctly.\"" - ] - }, - { - "cell_type": "markdown", - "id": "c1f59118-8229-4346-90ab-097caceccc11", - "metadata": {}, - "source": [ - "### Creating the GenerateFunctionTool\n", - "\n", - "We want the LLM agent to be able to generate pyTigerGraph function calls for eventual execution. We are going to once again pass the schema information, the installed queries (even though there are none installed currently), and an example of a built-in function call for pyTigerGraph such as `getVertexCount()`.\n", - "\n", - "This tool also utilizes pyTigerGraph to retrieve the installed queries on the graph and their parameters. This information is passed to the prompt asking the LLM to generate the appropriate function call." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "785d1dd0-d705-4358-a2e4-61864e022720", - "metadata": {}, - "outputs": [], - "source": [ - "class GenerateFunction(BaseTool):\n", - " name = \"GenerateFunction\"\n", - " description = \"Generate a pyTigerGraph function call\"\n", - " conn: TigerGraphConnection = None\n", - " llm: LLM = None\n", - " handle_tool_error: bool =True\n", - " \n", - " def __init__(self, conn, llm):\n", - " super().__init__()\n", - " self.conn = conn\n", - " self.llm = llm\n", - " \n", - " def _run(self, question: str) -> str:\n", - " prompt_template = \"\"\"Use the vertex types, edge types to write the pyTigerGraph function call to answer the question using a pyTigerGraph connection.\n", - " For example, if a count of vertices is asked for, use getVertexCount().\n", - " If multiple entites are referred to in the question, use runInstalledQuery() when necessary.\n", - " If there is not a natural answer to the question, ask a human for help.\n", - " Parameters of queries to be replaced are denoted .\n", - " Vertex Types: {vertices}\n", - " Edge Types: {edges}\n", - " Queries: {queries}\n", - " Question: {question}\n", - " Python Call: conn.\"\"\"\n", - " \n", - " PROMPT = PromptTemplate(\n", - " template=prompt_template, input_variables=[\"question\", \"vertices\", \"queries\", \"edges\"]\n", - " )\n", - " queries = conn.getInstalledQueries()\n", - " [queries[x][\"parameters\"].pop(\"read_committed\") for x in queries] #remove read_committed\n", - " inputs = [{\"question\": question, \n", - " \"vertices\": self.conn.getVertexTypes(), \n", - " \"edges\": self.conn.getEdgeTypes(), \n", - " \"queries\": [{queries[x][\"alternative_endpoint\"].split(\"/\")[-1]: queries[x][\"parameters\"]} for x in queries]\n", - " }]\n", - " \n", - " chain = LLMChain(llm=self.llm, prompt=PROMPT)\n", - " generated = chain.apply(inputs)[0][\"text\"]\n", - " return generated\n", - "\n", - " async def _arun(self) -> str:\n", - " \"\"\"Use the tool asynchronously.\"\"\"\n", - " raise NotImplementedError(\"custom_search does not support async\")\n", - " \n", - " def _handle_error(error:ToolException) -> str:\n", - " return \"The following errors occurred during tool execution:\" + error.args[0]+ \"Please ask for human input if they asked their question correctly.\"" - ] - }, - { - "cell_type": "markdown", - "id": "3ce2de99-b903-494a-97d9-989ec84a8e81", - "metadata": {}, - "source": [ - "### Creating ExecuteFunction Tool\n", - "Once the function has been generated, the ExecuteFunction tool will utilize Python and pyTigerGraph to run the generated function against the TigerGraph database. The tool will return the JSON returned by the TigerGraph REST API endpoint." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "c7656cd0-9c9a-4714-a6f7-0b85b60aeada", - "metadata": {}, - "outputs": [], - "source": [ - "class ExecuteFunction(BaseTool):\n", - " name = \"ExecuteFunction\"\n", - " description = \"Execute a pyTigerGraph function\"\n", - " conn: TigerGraphConnection = None\n", - " handle_tool_error: bool =True\n", - " \n", - " def __init__(self, conn):\n", - " super().__init__()\n", - " self.conn = conn\n", - " \n", - " def _run(self, function: str) -> str:\n", - " try:\n", - " loc = {}\n", - " exec(\"res = conn.\"+function, {\"conn\": conn}, loc)\n", - " return loc[\"res\"]\n", - " except:\n", - " raise ToolException(\"The function {} did not execute directly. Please rephrase your question and try again\".format(function))\n", - " \n", - " async def _arun(self) -> str:\n", - " \"\"\"Use the tool asynchronously.\"\"\"\n", - " raise NotImplementedError(\"custom_search does not support async\")\n", - " \n", - " def _handle_error(error:ToolException) -> str:\n", - " return \"The following errors occurred during tool execution:\" + error.args[0]+ \"Please ask for human input if they asked their question correctly.\"" - ] - }, - { - "cell_type": "markdown", - "id": "9f92e579-2fc4-4000-9d49-9c867df895e8", - "metadata": {}, - "source": [ - "## Initialize LLM and AI Agent\n", - "\n", - "Here, we are going to initialize our OpenAI GPT-3 LLM connection, define our tools, and create our LangChain AI Agent. The Agent is able to execute multiple tasks/tools in a linear fashion until it is satisified to answer the question. In this case, we are going to use the tools defined above to start answering questions." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "8dfd0831-080d-440f-9c81-2f1c04d09f54", - "metadata": {}, - "outputs": [], - "source": [ - "llm = OpenAI(temperature=0, model_name=\"text-davinci-003\")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "e7b6014f-7cb7-42b0-8912-16a7b843cc82", - "metadata": {}, - "outputs": [], - "source": [ - "tools = [MapQuestionToSchema(conn, llm), GenerateFunction(conn, llm), ExecuteFunction(conn)]" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "5dc4ba37-2a71-4b8f-bdb4-4eeab6a05b24", - "metadata": {}, - "outputs": [], - "source": [ - "agent = initialize_agent(\n", - " tools,\n", - " llm,\n", - " agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n", - " verbose=True,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "ed40b00a-bd50-4fd2-b5bd-ceb95a0e378f", - "metadata": {}, - "source": [ - "## Start Asking Questions\n", - "We are going to start asking the agent some questions. We are going to start easy and progressively get harder, and observe when things break. After that, we will modify our tool set and add queries to the database to help the agent answer more questions. We will start with basic counts of certain schema elements, then work our way into traversals and analytic queries." - ] - }, - { - "cell_type": "markdown", - "id": "32544960-0c68-4dc4-8b8b-ff6429556d0b", - "metadata": {}, - "source": [ - "### Some Easy Questions\n", - "Here, we ask the agent some basic questions, such as \"how many papers are there?\" and \"how many citations are there?\" - notice that the agent usedc the `getEdgeCount()` function of pyTigerGraph despite never specifically being prompted to do so. This shows the LLM's inference power to transfer the prompt example of `getVertexCount()` and use it for edge counting instead. Additionally, the \"How many writers are there?\" was correctly translated to \"How many Author Vertices are there?\" to match the graph's schema." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "b1ed9b9d-fe06-4511-9142-afd4d65f115b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", - "\u001b[32;1m\u001b[1;3m I need to find out how many Papers are in the graph\n", - "Action: MapQuestionToSchema\n", - "Action Input: How many Papers are there?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m How many vertices are Paper Vertexes?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to count the number of Paper Vertexes\n", - "Action: GenerateFunction\n", - "Action Input: Count the number of Paper Vertexes\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mgetVertexCount('Paper')\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the count\n", - "Action: ExecuteFunction\n", - "Action Input: getVertexCount('Paper')\u001b[0m\n", - "Observation: \u001b[38;5;200m\u001b[1;3m736389\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", - "Final Answer: There are 736389 Papers.\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "'There are 736389 Papers.'" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agent.run(\"How many Papers are there?\")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "03302d64-2410-4542-9257-3f6761cab1fa", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", - "\u001b[32;1m\u001b[1;3m I need to find out how many citations are in the graph\n", - "Action: MapQuestionToSchema\n", - "Action Input: How many citations are there?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m How many CITES Edges are there?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to count the number of CITES edges\n", - "Action: GenerateFunction\n", - "Action Input: COUNT CITES\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mgetEdgeCount('CITES')\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the number of CITES edges\n", - "Action: ExecuteFunction\n", - "Action Input: getEdgeCount('CITES')\u001b[0m\n", - "Observation: \u001b[38;5;200m\u001b[1;3m5416271\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", - "Final Answer: There are 5416271 citations.\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "'There are 5416271 citations.'" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agent.run(\"How many citations are there?\")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "7d0ee570-5c26-47f9-a744-03e23c8472ac", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", - "\u001b[32;1m\u001b[1;3m I need to find out how many writers there are in the graph.\n", - "Action: MapQuestionToSchema\n", - "Action Input: How many writers are there?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m How many vertices are Author Vertexes?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to count the number of Author vertices.\n", - "Action: GenerateFunction\n", - "Action Input: Count the number of Author vertices\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mgetVertexCount('Author')\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the answer.\n", - "Action: ExecuteFunction\n", - "Action Input: getVertexCount('Author')\u001b[0m\n", - "Observation: \u001b[38;5;200m\u001b[1;3m1134649\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n", - "Final Answer: 1134649 writers.\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "'1134649 writers.'" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agent.run(\"How many writers are there?\")" - ] - }, - { - "cell_type": "markdown", - "id": "5f6b5d8f-da57-437d-8e94-576dc2e7a9da", - "metadata": {}, - "source": [ - "### Some Hard Questions\n", - "Here, we are going to ask some questions that the current prompts and queries installed on the database can't answer. We will start with a simple `getVertexCount()` but with a `where` filter, then move on to traversal and analytic queries. These questions will cause the agent to work really hard, but won't find a solution within the execution limit." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "dcc5412c-e32f-421f-ad1e-741889eabe7b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", - "\u001b[32;1m\u001b[1;3m I need to find all the papers with a y attribute equal to 1.\n", - "Action: MapQuestionToSchema\n", - "Action Input: How many papers have a y attribute equal to 1?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m How many Paper Vertexes have a y attribute equal to 1?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to find the answer\n", - "Action: GenerateFunction\n", - "Action Input: Find all Paper Vertexes with a y attribute equal to 1\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('Find_Papers_with_y_attribute_equal_to_1', {'y': 1})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('Find_Papers_with_y_attribute_equal_to_1', {'y': 1})\u001b[0m\n", - "Observation: \u001b[31;1m\u001b[1;3mThe function runInstalledQuery('Find_Papers_with_y_attribute_equal_to_1', {'y': 1}) did not execute directly. Please rephrase your question and try again\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to rephrase my question\n", - "Action: MapQuestionToSchema\n", - "Action Input: How many Paper Vertexes have a y attribute equal to 1?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m How many Paper Vertexes have an attribute y equal to 1?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to find the answer\n", - "Action: GenerateFunction\n", - "Action Input: Find all Paper Vertexes with an attribute y equal to 1\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('Find_Papers_with_Attribute_y_Equal_to_1', {'y': 1})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('Find_Papers_with_Attribute_y_Equal_to_1', {'y': 1})\u001b[0m\n", - "Observation: \u001b[31;1m\u001b[1;3mThe function runInstalledQuery('Find_Papers_with_Attribute_y_Equal_to_1', {'y': 1}) did not execute directly. Please rephrase your question and try again\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to rephrase my question\n", - "Action: MapQuestionToSchema\n", - "Action Input: How many Paper Vertexes have an attribute y equal to 1?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m How many Paper Vertexes have an attribute y equal to 1?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to find the answer\n", - "Action: GenerateFunction\n", - "Action Input: Find all Paper Vertexes with an attribute y equal to 1\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('Find_Papers_with_Attribute_y_Equal_to_1', {'y': 1})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('Find_Papers_with_Attribute_y_Equal_to_1', {'y': 1})\u001b[0m\n", - "Observation: \u001b[31;1m\u001b[1;3mThe function runInstalledQuery('Find_Papers_with_Attribute_y_Equal_to_1', {'y': 1}) did not execute directly. Please rephrase your question and try again\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to rephrase my question\n", - "Action: MapQuestionToSchema\n", - "Action Input: How many Paper Vertexes have an attribute y equal to 1?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m How many Paper Vertexes have an attribute y equal to 1?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to find the answer\n", - "Action: GenerateFunction\n", - "Action Input: Find all Paper Vertexes with an attribute y equal to 1\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('Find_Papers_with_Attribute_y_Equal_to_1', {'y': 1})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('Find_Papers_with_Attribute_y_Equal_to_1', {'y': 1})\u001b[0m\n", - "Observation: \u001b[31;1m\u001b[1;3mThe function runInstalledQuery('Find_Papers_with_Attribute_y_Equal_to_1', {'y': 1}) did not execute directly. Please rephrase your question and try again\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to rephrase my question\n", - "Action: MapQuestionToSchema\n", - "Action Input: How many Paper Vertexes have an attribute y equal to 1?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m How many Paper Vertexes have an attribute y equal to 1?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to find the answer\n", - "Action: GenerateFunction\n", - "Action Input: Find all Paper Vertexes with an attribute y equal to 1\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('Find_Papers_with_Attribute_y_Equal_to_1', {'y': 1})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('Find_Papers_with_Attribute_y_Equal_to_1', {'y': 1})\u001b[0m\n", - "Observation: \u001b[31;1m\u001b[1;3mThe function runInstalledQuery('Find_Papers_with_Attribute_y_Equal_to_1', {'y': 1}) did not execute directly. Please rephrase your question and try again\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "'Agent stopped due to iteration limit or time limit.'" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agent.run(\"How many papers have a y attribute equal to 1?\")" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "5660ebb5-9877-4d65-bb64-adf4adc042c8", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", - "\u001b[32;1m\u001b[1;3m I need to find the institutions associated with author 45\n", - "Action: MapQuestionToSchema\n", - "Action Input: What institutions did author 45 work at?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m What Institutions are IS_AFFILIATED_WITH Edges connected to Author Vertex 45?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to get the institutions associated with author 45\n", - "Action: GenerateFunction\n", - "Action Input: Get the IS_AFFILIATED_WITH Edges connected to Author Vertex 45\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('getEdges', {'vertexType': 'Author', 'vertexId': '45', 'edgeType': 'IS_AFFILIATED_WITH'})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the institutions associated with author 45\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('getEdges', {'vertexType': 'Author', 'vertexId': '45', 'edgeType': 'IS_AFFILIATED_WITH'})\u001b[0m\n", - "Observation: \u001b[31;1m\u001b[1;3mThe function runInstalledQuery('getEdges', {'vertexType': 'Author', 'vertexId': '45', 'edgeType': 'IS_AFFILIATED_WITH'}) did not execute directly. Please rephrase your question and try again\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to find the institutions associated with author 45\n", - "Action: MapQuestionToSchema\n", - "Action Input: What institutions is author 45 affiliated with?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m What Institutions are Vertexes connected to Author Vertex 45 via the IS_AFFILIATED_WITH Edge?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to get the institutions associated with author 45\n", - "Action: GenerateFunction\n", - "Action Input: Get the Vertexes connected to Author Vertex 45 via the IS_AFFILIATED_WITH Edge\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('getConnectedVertexes', {'vertex_id': '45', 'vertex_type': 'Author', 'edge_type': 'IS_AFFILIATED_WITH'})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the institutions associated with author 45\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('getConnectedVertexes', {'vertex_id': '45', 'vertex_type': 'Author', 'edge_type': 'IS_AFFILIATED_WITH'})\u001b[0m\n", - "Observation: \u001b[31;1m\u001b[1;3mThe function runInstalledQuery('getConnectedVertexes', {'vertex_id': '45', 'vertex_type': 'Author', 'edge_type': 'IS_AFFILIATED_WITH'}) did not execute directly. Please rephrase your question and try again\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to find the institutions associated with author 45\n", - "Action: MapQuestionToSchema\n", - "Action Input: What institutions is author 45 affiliated with?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m What Institutions are Vertexes connected to Author Vertex 45 by an IS_AFFILIATED_WITH Edge?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to get the institutions associated with author 45\n", - "Action: GenerateFunction\n", - "Action Input: Get the Vertexes connected to Author Vertex 45 by an IS_AFFILIATED_WITH Edge\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('getConnectedVertexes', {'vertex_id': 45, 'vertex_type': 'Author', 'edge_type': 'IS_AFFILIATED_WITH'})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the institutions associated with author 45\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('getConnectedVertexes', {'vertex_id': 45, 'vertex_type': 'Author', 'edge_type': 'IS_AFFILIATED_WITH'})\u001b[0m\n", - "Observation: \u001b[31;1m\u001b[1;3mThe function runInstalledQuery('getConnectedVertexes', {'vertex_id': 45, 'vertex_type': 'Author', 'edge_type': 'IS_AFFILIATED_WITH'}) did not execute directly. Please rephrase your question and try again\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to find the institutions associated with author 45\n", - "Action: MapQuestionToSchema\n", - "Action Input: What institutions is author 45 affiliated with?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m What Institutions are Vertexes connected to Author Vertex 45 via the IS_AFFILIATED_WITH Edge?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to get the institutions associated with author 45\n", - "Action: GenerateFunction\n", - "Action Input: Get the Vertexes connected to Author Vertex 45 via the IS_AFFILIATED_WITH Edge\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('getConnectedVertexes', {'vertex_id': 45, 'vertex_type': 'Author', 'edge_type': 'IS_AFFILIATED_WITH'})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the institutions associated with author 45\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('getConnectedVertexes', {'vertex_id': 45, 'vertex_type': 'Author', 'edge_type': 'IS_AFFILIATED_WITH'})\u001b[0m\n", - "Observation: \u001b[31;1m\u001b[1;3mThe function runInstalledQuery('getConnectedVertexes', {'vertex_id': 45, 'vertex_type': 'Author', 'edge_type': 'IS_AFFILIATED_WITH'}) did not execute directly. Please rephrase your question and try again\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to find the institutions associated with author 45\n", - "Action: MapQuestionToSchema\n", - "Action Input: What institutions is author 45 affiliated with?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m What Institutions are Vertexes connected to Author Vertex 45 via the IS_AFFILIATED_WITH Edge?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to get the institutions associated with author 45\n", - "Action: GenerateFunction\n", - "Action Input: Get the Vertexes connected to Author Vertex 45 via the IS_AFFILIATED_WITH Edge\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('getConnectedVertexes', {'vertex_id': '45', 'vertex_type': 'Author', 'edge_type': 'IS_AFFILIATED_WITH'})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the institutions associated with author 45\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('getConnectedVertexes', {'vertex_id': '45', 'vertex_type': 'Author', 'edge_type': 'IS_AFFILIATED_WITH'})\u001b[0m\n", - "Observation: \u001b[31;1m\u001b[1;3mThe function runInstalledQuery('getConnectedVertexes', {'vertex_id': '45', 'vertex_type': 'Author', 'edge_type': 'IS_AFFILIATED_WITH'}) did not execute directly. Please rephrase your question and try again\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "'Agent stopped due to iteration limit or time limit.'" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agent.run(\"What institutions did author 45 work at?\")" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "14c2402d-1e35-499e-95fe-fbe626108ae6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", - "\u001b[32;1m\u001b[1;3m I need to find the papers with the most citations\n", - "Action: MapQuestionToSchema\n", - "Action Input: By citations, what are the 10 most influential papers?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m By CITES Edges, what are the 10 most influential Paper Vertices?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to get the most influential papers\n", - "Action: GenerateFunction\n", - "Action Input: By CITES Edges, what are the 10 most influential Paper Vertices?\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('MostInfluentialPapers', {'topK': 10})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the most influential papers\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('MostInfluentialPapers', {'topK': 10})\u001b[0m\n", - "Observation: \u001b[31;1m\u001b[1;3mThe function runInstalledQuery('MostInfluentialPapers', {'topK': 10}) did not execute directly. Please rephrase your question and try again\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to find the papers with the most citations\n", - "Action: MapQuestionToSchema\n", - "Action Input: By citations, what are the 10 most cited papers?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m By CITES Edges, what are the 10 most cited Paper Vertices?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to get the most cited papers\n", - "Action: GenerateFunction\n", - "Action Input: By CITES Edges, what are the 10 most cited Paper Vertices?\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('most_cited_papers', {'top_k': 10})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the most cited papers\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('most_cited_papers', {'top_k': 10})\u001b[0m\n", - "Observation: \u001b[31;1m\u001b[1;3mThe function runInstalledQuery('most_cited_papers', {'top_k': 10}) did not execute directly. Please rephrase your question and try again\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to find the papers with the most citations\n", - "Action: MapQuestionToSchema\n", - "Action Input: What are the 10 most cited papers?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m What are the 10 most CITED Edges?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to get the most cited papers\n", - "Action: GenerateFunction\n", - "Action Input: What are the 10 most CITED Edges?\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('MostCitedEdges', {'topK': 10})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the most cited papers\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('MostCitedEdges', {'topK': 10})\u001b[0m\n", - "Observation: \u001b[31;1m\u001b[1;3mThe function runInstalledQuery('MostCitedEdges', {'topK': 10}) did not execute directly. Please rephrase your question and try again\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to find the papers with the most citations\n", - "Action: MapQuestionToSchema\n", - "Action Input: What are the 10 most cited papers?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m What are the 10 most CITED Edges?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to get the most cited papers\n", - "Action: GenerateFunction\n", - "Action Input: What are the 10 most CITED Edges?\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('MostCitedEdges', {'topK': 10})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the most cited papers\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('MostCitedEdges', {'topK': 10})\u001b[0m\n", - "Observation: \u001b[31;1m\u001b[1;3mThe function runInstalledQuery('MostCitedEdges', {'topK': 10}) did not execute directly. Please rephrase your question and try again\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to find the papers with the most citations\n", - "Action: MapQuestionToSchema\n", - "Action Input: What are the 10 most cited papers?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m What are the 10 most CITED Edges?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to get the most cited papers\n", - "Action: GenerateFunction\n", - "Action Input: What are the 10 most CITED Edges?\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('MostCitedEdges', {'topK': 10})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the most cited papers\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('MostCitedEdges', {'topK': 10})\u001b[0m\n", - "Observation: \u001b[31;1m\u001b[1;3mThe function runInstalledQuery('MostCitedEdges', {'topK': 10}) did not execute directly. Please rephrase your question and try again\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "'Agent stopped due to iteration limit or time limit.'" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agent.run(\"By citations, what are the 10 most influential papers?\")" - ] - }, - { - "cell_type": "markdown", - "id": "cfa52e60-9fdc-45dc-b3d2-a94299923914", - "metadata": {}, - "source": [ - "## Add Some Queries to Better Answer Questions\n", - "\n", - "Here we are going to add some queries to the graph to better answer some \"domain specific\" questions. These queries are quite simple, but provide a template of what more complex queries could take the form of." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "c4f9ece8-078f-4ca7-bdd7-b93b9ab0af93", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Using graph \\'OGB_MAG\\'\\nWarning in a transformed block of query author_at_institutions (WARN-2): line 6, col 31\\nunsatisfiable pattern _\\nSuccessfully created queries: [author_at_institutions].\\nStart installing queries, about 1 minute ...\\nauthor_at_institutions query: curl -X GET \\'https://127.0.0.1:9000/query/OGB_MAG/author_at_institutions?auth_id=VALUE\\'. Add -H \"Authorization: Bearer TOKEN\" if authentication is enabled.\\nSelect \\'m1\\' as compile server, now connecting ...\\nNode \\'m1\\' is prepared as compile server.\\n\\nQuery installation finished.'" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "conn.gsql(\"\"\"USE GRAPH OGB_MAG\n", - " CREATE OR REPLACE QUERY author_at_institutions(VERTEX auth_id) FOR GRAPH OGB_MAG { \n", - " start = {auth_id};\n", - "\n", - " res = SELECT i FROM start:s -()- Institution:i;\n", - " PRINT res[res.id];\n", - " }\n", - " INSTALL QUERY author_at_institutions\"\"\")" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "71fa7fbd-6692-4be4-983e-1d2b62f46460", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'Using graph \\'OGB_MAG\\'\\nWarning in a transformed block of query get_papers_of_author (WARN-2): line 5, col 31\\nunsatisfiable pattern _\\nSuccessfully created queries: [get_papers_of_author].\\nStart installing queries, about 1 minute ...\\nget_papers_of_author query: curl -X GET \\'https://127.0.0.1:9000/query/OGB_MAG/get_papers_of_author?auth=VALUE\\'. Add -H \"Authorization: Bearer TOKEN\" if authentication is enabled.\\nSelect \\'m1\\' as compile server, now connecting ...\\nNode \\'m1\\' is prepared as compile server.\\n\\nQuery installation finished.'" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "conn.gsql(\"\"\"USE GRAPH OGB_MAG\n", - " CREATE OR REPLACE QUERY get_papers_of_author(VERTEX auth) FOR GRAPH OGB_MAG { \n", - " start = {auth};\n", - " res = SELECT p FROM start:s -()- Paper:p;\n", - " PRINT res[res.id];\n", - " }\n", - " INSTALL QUERY get_papers_of_author\"\"\")" - ] - }, - { - "cell_type": "markdown", - "id": "9836eaec-b896-489f-b5fd-974ee6d4547e", - "metadata": {}, - "source": [ - "### Test the Queries with the Agent\n", - "Since the **GenerateFunction** tool automatically pulls the queries available to it at execution time, we can immediately test if our new queries are used by the agent." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "d8970c43-7561-4aae-bf22-2e2e7d2db590", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", - "\u001b[32;1m\u001b[1;3m I need to find the institutions associated with author 45\n", - "Action: MapQuestionToSchema\n", - "Action Input: What institutions did author 45 work at?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m What Institutions are IS_AFFILIATED_WITH Edges connected to Author Vertex 45?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to get the institutions associated with author 45\n", - "Action: GenerateFunction\n", - "Action Input: Get the IS_AFFILIATED_WITH Edges connected to Author Vertex 45\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('author_at_institutions', {'auth_id': 45})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the institutions associated with author 45\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('author_at_institutions', {'auth_id': 45})\u001b[0m\n", - "Observation: \u001b[38;5;200m\u001b[1;3m[{'res': [{'v_id': '649', 'v_type': 'Institution', 'attributes': {'res.id': 649}}]}]\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", - "Final Answer: The institution with id 649 is the institution that author 45 is affiliated with.\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "'The institution with id 649 is the institution that author 45 is affiliated with.'" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agent.run(\"What institutions did author 45 work at?\")" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "c27a7585-4c63-41b4-8f17-cd85b22a9954", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", - "\u001b[32;1m\u001b[1;3m I need to find the papers written by author 5\n", - "Action: MapQuestionToSchema\n", - "Action Input: What papers did author 5 write?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m What WROTE Edges are there from Author Vertex 5?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to get the papers written by author 5\n", - "Action: GenerateFunction\n", - "Action Input: Get WROTE Edges from Author Vertex 5\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('get_papers_of_author', {'auth': '5'})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the papers written by author 5\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('get_papers_of_author', {'auth': '5'})\u001b[0m\n", - "Observation: \u001b[38;5;200m\u001b[1;3m[{'res': [{'v_id': '539403', 'v_type': 'Paper', 'attributes': {'res.id': 539403}}, {'v_id': '676656', 'v_type': 'Paper', 'attributes': {'res.id': 676656}}, {'v_id': '215835', 'v_type': 'Paper', 'attributes': {'res.id': 215835}}]}]\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", - "Final Answer: The papers written by author 5 are 539403, 676656, and 215835.\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "'The papers written by author 5 are 539403, 676656, and 215835.'" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agent.run(\"What papers did author 5 write?\")" - ] - }, - { - "cell_type": "markdown", - "id": "88370716-444f-403c-ad9a-e629187628a2", - "metadata": {}, - "source": [ - "## Add Graph Data Science Capabilities\n", - "\n", - "One of the questions that the agent failed to answer was \"What is the most influential paper based on citations?\". This is a question that the *PageRank algorithm* can answer. Here, we are going to install the TigerGraph implementation of PageRank to make it available to the agent." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "9ddd107d-3099-4f6e-8549-1a41ff9e775a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Installing and optimizing the queries, it might take a minute...\n", - "Queries installed successfully\n" - ] - }, - { - "data": { - "text/plain": [ - "'tg_pagerank'" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f = conn.gds.featurizer()\n", - "\n", - "f.installAlgorithm(\"tg_pagerank\")" - ] - }, - { - "cell_type": "markdown", - "id": "d8755d37-1eb9-4941-ac2f-b14ab5f7c24b", - "metadata": {}, - "source": [ - "### Test the Question\n", - "Here, the agent maps the question of \"influence\" to the available PageRank algorithm, and correctly fills in the parameters." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "0663d4ee-21ef-4c2e-8ddd-668ea06bf840", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", - "\u001b[32;1m\u001b[1;3m I need to find the papers with the most citations\n", - "Action: MapQuestionToSchema\n", - "Action Input: By citations, what are the 10 most influential papers?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m By CITES Edges, what are the 10 most influential Paper Vertices?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to get the most influential papers\n", - "Action: GenerateFunction\n", - "Action Input: By CITES Edges, what are the 10 most influential Paper Vertices?\u001b[0m\n", - "Observation: \u001b[33;1m\u001b[1;3mrunInstalledQuery('tg_pagerank', {'v_type': 'Paper', 'e_type': 'CITES', 'top_k': 10})\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the most influential papers\n", - "Action: ExecuteFunction\n", - "Action Input: runInstalledQuery('tg_pagerank', {'v_type': 'Paper', 'e_type': 'CITES', 'top_k': 10})\u001b[0m\n", - "Observation: \u001b[38;5;200m\u001b[1;3m[{'@@top_scores_heap': [{'Vertex_ID': '428523', 'score': 392.87326}, {'Vertex_ID': '384889', 'score': 251.80214}, {'Vertex_ID': '377502', 'score': 149.10182}, {'Vertex_ID': '61855', 'score': 129.7406}, {'Vertex_ID': '416200', 'score': 129.22864}, {'Vertex_ID': '380836', 'score': 125.35667}, {'Vertex_ID': '40415', 'score': 122.92435}, {'Vertex_ID': '437914', 'score': 117.94849}, {'Vertex_ID': '394406', 'score': 110.56911}, {'Vertex_ID': '533629', 'score': 103.74941}]}]\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer\n", - "Final Answer: The 10 most influential papers, by citations, are 428523, 384889, 377502, 61855, 416200, 380836, 40415, 437914, 394406, and 533629.\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "'The 10 most influential papers, by citations, are 428523, 384889, 377502, 61855, 416200, 380836, 40415, 437914, 394406, and 533629.'" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agent.run(\"By citations, what are the 10 most influential papers?\")" - ] - }, - { - "cell_type": "markdown", - "id": "81ce6d1a-4559-4375-b34e-370029ede329", - "metadata": {}, - "source": [ - "## Create Specific Tools\n", - "There is one more question that originally failed that we have not yet resolved - the question of \"How many Papers have a y attribute equal to 1?\". To do this, we could write a GSQL query. However, the pyTigerGraph `getVertexCount()` function has a `where` parameter that can be used to accomplish the same task. Unfortunately, the agent doesn't know this due to the lack of specific knowledge about pyTigerGraph. To resolve this, we can define a custom tool for getting the vertex count." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "c0a97c15-1399-4bd5-a2fe-42642b8cb907", - "metadata": {}, - "outputs": [], - "source": [ - "class GenerateVertexCountFunction(BaseTool):\n", - " name = \"GenerateVertexCountFunction\"\n", - " description = \"Generate a pyTigerGraph function call to count vertices with filters.\"\n", - " conn: TigerGraphConnection = None\n", - " llm: LLM = None\n", - " handle_tool_error: bool =True\n", - " \n", - " def __init__(self, conn, llm):\n", - " super().__init__()\n", - " self.conn = conn\n", - " self.llm = llm\n", - " \n", - " def _run(self, question: str) -> str:\n", - " prompt_template = \"\"\"Use the vertex types and any potential filters to generate the conn.getVertexCount() function call.\n", - " \"\"\"+self.conn.getVertexCount.__doc__.replace(\"{\", \"\").replace(\"}\", \"\")+\"\"\"\n", - " Vertex Types: {vertices}\n", - " Question: {question}\n", - " Python Call to run the function: conn.\"\"\"\n", - " \n", - " \n", - " PROMPT = PromptTemplate(\n", - " template=prompt_template, input_variables=[\"question\", \"vertices\"]\n", - " )\n", - " queries = conn.getInstalledQueries()\n", - " inputs = [{\"question\": question, \n", - " \"vertices\": self.conn.getVertexTypes()\n", - " }]\n", - " \n", - " chain = LLMChain(llm=self.llm, prompt=PROMPT)\n", - " generated = chain.apply(inputs)[0][\"text\"]\n", - " return \"Use in ExecuteFunction tool: \"+generated\n", - " \n", - " async def _arun(self) -> str:\n", - " \"\"\"Use the tool asynchronously.\"\"\"\n", - " raise NotImplementedError(\"custom_search does not support async\")" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "d84e29e5-ce30-4315-a7f3-63527a404fee", - "metadata": {}, - "outputs": [], - "source": [ - "tools = [MapQuestionToSchema(conn, llm), GenerateFunction(conn, llm), GenerateVertexCountFunction(conn, llm), ExecuteFunction(conn)]" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "a1d2d9a9-ccc0-4df2-ab8f-7bfb0a33f331", - "metadata": {}, - "outputs": [], - "source": [ - "agent = initialize_agent(\n", - " tools,\n", - " llm,\n", - " agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n", - " verbose=True,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "b60c764b-a34b-4500-895c-d493998390f1", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", - "\u001b[32;1m\u001b[1;3m I need to count the number of papers with a y attribute equal to 1.\n", - "Action: MapQuestionToSchema\n", - "Action Input: How many papers have a y attribute equal to 1?\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m How many Paper Vertexes have a y attribute equal to 1?\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to generate a function to count the number of papers with a y attribute equal to 1.\n", - "Action: GenerateVertexCountFunction\n", - "Action Input: Paper Vertex, y attribute equal to 1\u001b[0m\n", - "Observation: \u001b[38;5;200m\u001b[1;3mUse in ExecuteFunction tool: getVertexCount('Paper', where='y=1')\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I need to execute the function to get the answer.\n", - "Action: ExecuteFunction\n", - "Action Input: getVertexCount('Paper', where='y=1')\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3m30902\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m I now know the final answer.\n", - "Final Answer: 30902 papers have a y attribute equal to 1.\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "'30902 papers have a y attribute equal to 1.'" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agent.run(\"How many papers have a y attribute equal to 1?\")" - ] - }, - { - "cell_type": "markdown", - "id": "e5e1d2af-1176-472e-b93e-b79d6da4e98e", - "metadata": {}, - "source": [ - "# Conclusion\n", - "\n", - "Integrating TigerGraph with LangChain using pyTigerGraph opens up exciting possibilities for enhancing natural language interfaces for graph databases. By combining the power of LLMs with the flexibility of graph databases, users can interact with the data using everyday language, reducing the learning curve and enabling more intuitive data exploration. With the LangChain AI Agent, users can ask questions, receive meaningful responses, and gain insights from the graph database in a seamless and user-friendly manner. These agents can be used by business analysts for internal uses, or can serve as the foundation for chatbots to serve results to customers." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4bf6703f-dfe5-409b-9d31-301c2114f832", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3.8.9 64-bit", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.9" - }, - "vscode": { - "interpreter": { - "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/basics/data_loaders.ipynb b/basics/data_loaders.ipynb deleted file mode 100644 index 5a16c7d..0000000 --- a/basics/data_loaders.ipynb +++ /dev/null @@ -1,761 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "31a41fec-c7f0-4752-b57a-efb428327343", - "metadata": { - "tags": [] - }, - "source": [ - "# Data Loaders" - ] - }, - { - "cell_type": "markdown", - "id": "85d51c63-ed05-43d4-9e02-7d13763b5011", - "metadata": {}, - "source": [ - "This notebook demonstrates the use of **data loaders** in `pyTigerGraph`. The job of a data loader is to pull data from the TigerGraph database to the machine that runs the data loaders. Currently, the following data loaders are provided:\n", - "* EdgeLoader, which returns batches of edges.\n", - "* VertexLoader, which returns batches of vertices.\n", - "* GraphLoader, which returns randomly sampled (probably disconnected) subgraphs in pandas `dataframe`, `PyG` or `DGL` format.\n", - "* NeighborLoader, which returns subgraphs as sampled in [GraphSAGE](https://arxiv.org/abs/1706.02216) in `dataframe`, `PyG` or `DGL` format.\n", - "* EdgeNeighborLoader, which returns subgraphs using neighbor sampling from edges in `dataframe`, `PyG` or `DGL` format.\n", - "* HGTLoader, which works similarly as NeighborLoader but performs stratified neighbor sampling as in [Heterogeneous Graph Transformer](https://arxiv.org/abs/2003.01332).\n", - "\n", - "Every data loader above can either get all the batches as a HTTP response (default) or stream every batch through Kafka. The former mechanism is good for testing with small graphs and it is fast, but it subjects to a data size limit of 2GB. For large graphs, the HTTP channel will likely fail due to size limit and network connectivity issues. Streaming via Kafka is offered for data robustness and scalability. Also, Kafka excels at multi-consumer use cases, and is required by distributed loader groups. \n", - "\n", - "The data loaders support both homogeneous and heterogenous graphs. By default, they load from all vertex and edge types and treat the graph as a homogeneous graph. But they also allow users to specify what vertex and edge types to load from and what attributes to load from each type. This way users will get heterogeneous graph outputs.\n", - "\n", - "**NOTE**: Currently, your database needs to be activated (only once) to work with the data loaders. If you are using the ML Workbench on tgCloud, then the bundled database is activated. Otherwise, you can download the activator at https://act.tigergraphlabs.com. Detailed instructions are also included on that website. \n", - "\n", - "Below we will use `NeighborLoader` for illustration. The other data loaders work similarly but use slightly different parameters. Please refer to the [reference doc](https://docs.tigergraph.com/pytigergraph/current/gds/gds) on details for each data loader." - ] - }, - { - "cell_type": "markdown", - "id": "57e8dc12-390c-4830-8bbb-3e8e80763867", - "metadata": {}, - "source": [ - "## Table of Contents\n", - "* [Connection to Database](#connection) \n", - "* [Neighbor Loader](#neighborloader)\n", - " * [Homogeneous Graph](#homogeneous)\n", - " * [Heterogeneous Graph](#heterogeneous)\n", - "* [Streaming through Kafka](#kafka) " - ] - }, - { - "cell_type": "markdown", - "id": "9ff08e15-5d93-4f30-8a9d-6b101b1604e4", - "metadata": {}, - "source": [ - "## Connection to Database \n", - "\n", - "The `TigerGraphConnection` class represents a connection to the TigerGraph database. Under the hood, it stores the necessary information to communicate with the database. It is able to perform quite a few database tasks. Please see its [documentation](https://docs.tigergraph.com/pytigergraph/current/intro/) for details.\n", - "\n", - "To connect your database, modify the `config.json` file accompanying this notebook. Set the value of `getToken` based on whether token auth is enabled for your database. Token auth is always enabled for tgcloud databases. " - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "c2c4b1e8-a0e2-4026-9bb1-218cdc7ca4b9", - "metadata": {}, - "outputs": [], - "source": [ - "from pyTigerGraph import TigerGraphConnection\n", - "import json\n", - "\n", - "# Read in DB configs\n", - "with open('../config.json', \"r\") as config_file:\n", - " config = json.load(config_file)\n", - " \n", - "conn = TigerGraphConnection(\n", - " host=config[\"host\"],\n", - " username=config[\"username\"],\n", - " password=config[\"password\"]\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "91d0deb8-9029-494c-8175-7675fddc862a", - "metadata": {}, - "source": [ - "### Ingest Data" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "6b7d1033-06ab-4392-8baf-df27046c57af", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "A folder with name Cora already exists in ./tmp. Skip downloading.\n" - ] - } - ], - "source": [ - "from pyTigerGraph.datasets import Datasets\n", - "\n", - "dataset = Datasets(\"Cora\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "2bc82ea1-33f5-4605-a303-a108a64265cf", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "---- Checking database ----\n", - "A graph with name Cora already exists in the database. Skip ingestion.\n", - "Graph name is set to Cora for this connection.\n" - ] - } - ], - "source": [ - "conn.ingestDataset(dataset, getToken=config[\"getToken\"])" - ] - }, - { - "cell_type": "markdown", - "id": "b7bd1ef7-fc77-4ff3-931f-ebc05aa7f2b5", - "metadata": {}, - "source": [ - "### Visualize Schema" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "58b5073a-79fc-4156-8ebd-e20c1a2df8bb", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e6960c3d17a047379cebebf0cef81617", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "CytoscapeWidget(cytoscape_layout={'name': 'circle', 'animate': True, 'padding': 1}, cytoscape_style=[{'selecto…" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pyTigerGraph.visualization import drawSchema\n", - "\n", - "drawSchema(conn.getSchema(force=True))" - ] - }, - { - "cell_type": "markdown", - "id": "4eb10dad-592d-4ec1-b57e-7cd61ea34637", - "metadata": {}, - "source": [ - "### Basic Statistics" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "a1dd0252-a5e5-47ee-acce-b843571c78cc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'Paper': 2708}" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Number of vertices for every vertex type\n", - "conn.getVertexCount()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "318f1998-1179-4ae2-9f1b-d1b032076c03", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'Cite': 10556}" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Number of edges for every type\n", - "conn.getEdgeCount()" - ] - }, - { - "cell_type": "markdown", - "id": "d54f29ef-6b65-4999-9363-f9dfde5e478c", - "metadata": { - "tags": [] - }, - "source": [ - "## Neighbor Loader " - ] - }, - { - "cell_type": "markdown", - "id": "a19aefc0-6c7c-4815-b3c7-2a2a3fd3cd3f", - "metadata": {}, - "source": [ - "### Homogeneous Graph " - ] - }, - { - "cell_type": "markdown", - "id": "49331e44-2bd6-4e9b-b061-3de410eb4dad", - "metadata": {}, - "source": [ - "`NeighborLoader` performs neighbor sampling as introduced in [Inductive Representation Learning on Large Graphs](https://arxiv.org/abs/1706.02216) and returns neighborhood subgraphs. Hence, the subgraphs from this loader are connected. \n", - "\n", - "Specifically, the loader first chooses `batch_size` number of vertices as seeds, then picks `num_neighbors` number of neighbors of each seed at random, then `num_neighbors` neighbors of each neighbor, and repeat for `num_hops`. This generates one subgraph. As you loop through this data loader, every vertex will at some point be chosen as a seed and you will get the subgraph expanded from the seed. If you want to limit seeds to certain vertices, the boolean attribute provided to `filter_by` will be used to indicate which vertices can be included as seeds. \n", - "\n", - "The loader can be created by calling the `.gds.neighborLoader()` function on the DB connection object. Key parameters to the data loader are\n", - "* batch_size, num_neighbors, num_hops: they are for the neighbor sampling process as described above.\n", - "* v_in_feats, v_out_labels, v_extra_feats: those dictate which vertex attributes will be pulled from the database. They can be omitted if no vertex attribute is needed.\n", - "* e_in_feats, e_out_labels, e_extra_feats: similar as above but for edge attributes.\n", - "* output_format: format of the output graph. \"PyG\", \"DGL\", \"spektral\", and \"dataframe\" are supported.\n", - "* shuffle: whether to shuffle the vertices before loading data.\n", - "* filter_by: a boolean attribute indicating which vertices can be included as seeds.\n", - "\n", - "For details on those parameters and the complete parameter list, please refer to the [doc](https://docs.tigergraph.com/pytigergraph/current/gds/gds#_neighborloader).\n", - "\n", - "**Note**: For the first time you initialize the loader on a graph in TigerGraph,\n", - "the initialization might take a minute as it installs the corresponding\n", - "query to the database and optimizes it. However, the query installation only\n", - "needs to be done once, so it will take no time when you initialize the loader\n", - "on the same graph again." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "269bf653-ea3a-4646-aa0d-ef539838a7cc", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 1.81 s, sys: 154 ms, total: 1.97 s\n", - "Wall time: 2 s\n" - ] - } - ], - "source": [ - "%%time\n", - "neighbor_loader = conn.gds.neighborLoader(\n", - " batch_size=16,\n", - " num_neighbors = 10,\n", - " num_hops =2,\n", - " v_in_feats = [\"x\"],\n", - " v_out_labels = [\"y\"],\n", - " v_extra_feats = [\"train_mask\", \"val_mask\", \"test_mask\"],\n", - " e_in_feats=[\"time\"],\n", - " e_out_labels=[],\n", - " e_extra_feats=[\"is_train\", \"is_val\"],\n", - " output_format = \"PyG\",\n", - " shuffle=True,\n", - " filter_by=\"train_mask\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "d0166555-7d60-4648-bf34-37819ed04f8a", - "metadata": {}, - "source": [ - "There are two ways to use this data loader. We will try the first one below as it is more common.\n", - "* First, it can be used as an iterable, which means you can loop through\n", - " it to get every batch of data. If you load all data in one batch (`num_batches=1`),\n", - " there will be only one batch (of all the data) in the iterator.\n", - "* Second, you can access the `data` property of the class directly. If there is\n", - " only one batch of data to load, it will give you the batch directly instead\n", - " of an iterator, which might make more sense in that case. If there are\n", - " multiple batches of data to load, it will return the loader itself." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "cc82350c-9e80-4ff1-8c1b-67ae60a75a91", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----Batch 0----\n", - "Data(edge_index=[2, 407], edge_feat=[407], is_train=[407], is_val=[407], x=[271, 1433], y=[271], train_mask=[271], val_mask=[271], test_mask=[271], is_seed=[271])\n", - "----Batch 1----\n", - "Data(edge_index=[2, 362], edge_feat=[362], is_train=[362], is_val=[362], x=[222, 1433], y=[222], train_mask=[222], val_mask=[222], test_mask=[222], is_seed=[222])\n", - "----Batch 2----\n", - "Data(edge_index=[2, 316], edge_feat=[316], is_train=[316], is_val=[316], x=[220, 1433], y=[220], train_mask=[220], val_mask=[220], test_mask=[220], is_seed=[220])\n", - "----Batch 3----\n", - "Data(edge_index=[2, 395], edge_feat=[395], is_train=[395], is_val=[395], x=[257, 1433], y=[257], train_mask=[257], val_mask=[257], test_mask=[257], is_seed=[257])\n", - "----Batch 4----\n", - "Data(edge_index=[2, 445], edge_feat=[445], is_train=[445], is_val=[445], x=[282, 1433], y=[282], train_mask=[282], val_mask=[282], test_mask=[282], is_seed=[282])\n", - "----Batch 5----\n", - "Data(edge_index=[2, 381], edge_feat=[381], is_train=[381], is_val=[381], x=[254, 1433], y=[254], train_mask=[254], val_mask=[254], test_mask=[254], is_seed=[254])\n", - "----Batch 6----\n", - "Data(edge_index=[2, 359], edge_feat=[359], is_train=[359], is_val=[359], x=[254, 1433], y=[254], train_mask=[254], val_mask=[254], test_mask=[254], is_seed=[254])\n", - "----Batch 7----\n", - "Data(edge_index=[2, 391], edge_feat=[391], is_train=[391], is_val=[391], x=[247, 1433], y=[247], train_mask=[247], val_mask=[247], test_mask=[247], is_seed=[247])\n", - "----Batch 8----\n", - "Data(edge_index=[2, 392], edge_feat=[392], is_train=[392], is_val=[392], x=[261, 1433], y=[261], train_mask=[261], val_mask=[261], test_mask=[261], is_seed=[261])\n", - "CPU times: user 1.24 s, sys: 56.9 ms, total: 1.3 s\n", - "Wall time: 30.1 s\n" - ] - } - ], - "source": [ - "%%time\n", - "for i, batch in enumerate(neighbor_loader):\n", - " print(\"----Batch {}----\".format(i))\n", - " print(batch)" - ] - }, - { - "cell_type": "markdown", - "id": "ff561ba1", - "metadata": {}, - "source": [ - "### Heterogeneous Graph " - ] - }, - { - "cell_type": "markdown", - "id": "c4dee177", - "metadata": {}, - "source": [ - "If the code above were run on heterogeneous graphs, it would also work but would ignore vertex or edge types and output homogeneous subgraphs (it also requires that the desired attributes exist on all vertices/edges or an error will be thrown). If you need the output to be heterogeneous subgraphs, use the dict input for v_in_feats, v_out_labels, v_extra_feats, e_in_feats, e_out_labels, or e_extra_feats. Keys of the dict are vertex/edge types to be selected, and values are lists of attributes to be selected for the vertex/edge types. This also gives you fine control over what types of vertices/edges to be included in the sampling process and in the output. \n", - "\n", - "Although `Cora` is a homogeneous graph, we will use it to illustrate how to specify the input to get output as heterogeneous subgraphs. It is straightforward to replace this Cora graph with your heterogeneous graph." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "29498931", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Installing and optimizing queries. It might take a minute or two.\n", - "Query installation finished.\n", - "CPU times: user 55.8 ms, sys: 4.95 ms, total: 60.7 ms\n", - "Wall time: 1min 11s\n" - ] - } - ], - "source": [ - "%%time\n", - "neighbor_loader = conn.gds.neighborLoader(\n", - " batch_size=16,\n", - " num_neighbors = 10,\n", - " num_hops =2,\n", - " v_in_feats = {\"Paper\": [\"x\"]},\n", - " v_out_labels = {\"Paper\": [\"y\"]},\n", - " v_extra_feats = {\"Paper\": [\"train_mask\", \"val_mask\", \"test_mask\"]},\n", - " e_in_feats={\"Cite\": [\"time\"]},\n", - " e_extra_feats={\"Cite\": [\"is_train\", \"is_val\"]},\n", - " output_format = \"PyG\",\n", - " shuffle=True,\n", - " filter_by={\"Paper\": \"train_mask\"}\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "f8044336-ba1f-425c-895a-dbf7facfd945", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----Batch 0----\n", - "HeteroData(\n", - " \u001b[1mPaper\u001b[0m={\n", - " x=[278, 1433],\n", - " y=[278],\n", - " train_mask=[278],\n", - " val_mask=[278],\n", - " test_mask=[278],\n", - " is_seed=[278]\n", - " },\n", - " \u001b[1m(Paper, Cite, Paper)\u001b[0m={\n", - " edge_index=[2, 410],\n", - " edge_feat=[410],\n", - " is_train=[410],\n", - " is_val=[410]\n", - " }\n", - ")\n", - "----Batch 1----\n", - "HeteroData(\n", - " \u001b[1mPaper\u001b[0m={\n", - " x=[267, 1433],\n", - " y=[267],\n", - " train_mask=[267],\n", - " val_mask=[267],\n", - " test_mask=[267],\n", - " is_seed=[267]\n", - " },\n", - " \u001b[1m(Paper, Cite, Paper)\u001b[0m={\n", - " edge_index=[2, 451],\n", - " edge_feat=[451],\n", - " is_train=[451],\n", - " is_val=[451]\n", - " }\n", - ")\n", - "----Batch 2----\n", - "HeteroData(\n", - " \u001b[1mPaper\u001b[0m={\n", - " x=[252, 1433],\n", - " y=[252],\n", - " train_mask=[252],\n", - " val_mask=[252],\n", - " test_mask=[252],\n", - " is_seed=[252]\n", - " },\n", - " \u001b[1m(Paper, Cite, Paper)\u001b[0m={\n", - " edge_index=[2, 382],\n", - " edge_feat=[382],\n", - " is_train=[382],\n", - " is_val=[382]\n", - " }\n", - ")\n", - "----Batch 3----\n", - "HeteroData(\n", - " \u001b[1mPaper\u001b[0m={\n", - " x=[249, 1433],\n", - " y=[249],\n", - " train_mask=[249],\n", - " val_mask=[249],\n", - " test_mask=[249],\n", - " is_seed=[249]\n", - " },\n", - " \u001b[1m(Paper, Cite, Paper)\u001b[0m={\n", - " edge_index=[2, 422],\n", - " edge_feat=[422],\n", - " is_train=[422],\n", - " is_val=[422]\n", - " }\n", - ")\n", - "----Batch 4----\n", - "HeteroData(\n", - " \u001b[1mPaper\u001b[0m={\n", - " x=[238, 1433],\n", - " y=[238],\n", - " train_mask=[238],\n", - " val_mask=[238],\n", - " test_mask=[238],\n", - " is_seed=[238]\n", - " },\n", - " \u001b[1m(Paper, Cite, Paper)\u001b[0m={\n", - " edge_index=[2, 369],\n", - " edge_feat=[369],\n", - " is_train=[369],\n", - " is_val=[369]\n", - " }\n", - ")\n", - "----Batch 5----\n", - "HeteroData(\n", - " \u001b[1mPaper\u001b[0m={\n", - " x=[276, 1433],\n", - " y=[276],\n", - " train_mask=[276],\n", - " val_mask=[276],\n", - " test_mask=[276],\n", - " is_seed=[276]\n", - " },\n", - " \u001b[1m(Paper, Cite, Paper)\u001b[0m={\n", - " edge_index=[2, 417],\n", - " edge_feat=[417],\n", - " is_train=[417],\n", - " is_val=[417]\n", - " }\n", - ")\n", - "----Batch 6----\n", - "HeteroData(\n", - " \u001b[1mPaper\u001b[0m={\n", - " x=[179, 1433],\n", - " y=[179],\n", - " train_mask=[179],\n", - " val_mask=[179],\n", - " test_mask=[179],\n", - " is_seed=[179]\n", - " },\n", - " \u001b[1m(Paper, Cite, Paper)\u001b[0m={\n", - " edge_index=[2, 249],\n", - " edge_feat=[249],\n", - " is_train=[249],\n", - " is_val=[249]\n", - " }\n", - ")\n", - "----Batch 7----\n", - "HeteroData(\n", - " \u001b[1mPaper\u001b[0m={\n", - " x=[249, 1433],\n", - " y=[249],\n", - " train_mask=[249],\n", - " val_mask=[249],\n", - " test_mask=[249],\n", - " is_seed=[249]\n", - " },\n", - " \u001b[1m(Paper, Cite, Paper)\u001b[0m={\n", - " edge_index=[2, 360],\n", - " edge_feat=[360],\n", - " is_train=[360],\n", - " is_val=[360]\n", - " }\n", - ")\n", - "----Batch 8----\n", - "HeteroData(\n", - " \u001b[1mPaper\u001b[0m={\n", - " x=[216, 1433],\n", - " y=[216],\n", - " train_mask=[216],\n", - " val_mask=[216],\n", - " test_mask=[216],\n", - " is_seed=[216]\n", - " },\n", - " \u001b[1m(Paper, Cite, Paper)\u001b[0m={\n", - " edge_index=[2, 323],\n", - " edge_feat=[323],\n", - " is_train=[323],\n", - " is_val=[323]\n", - " }\n", - ")\n", - "CPU times: user 1.27 s, sys: 1.9 ms, total: 1.27 s\n", - "Wall time: 29.2 s\n" - ] - } - ], - "source": [ - "%%time\n", - "for i, batch in enumerate(neighbor_loader):\n", - " print(\"----Batch {}----\".format(i))\n", - " print(batch)" - ] - }, - { - "cell_type": "markdown", - "id": "8a8dd1f1-9139-4f7a-b10f-3ecd60226541", - "metadata": { - "tags": [] - }, - "source": [ - "## Streaming through Kafka \n", - "\n", - "**Note**: Kafka streaming function is only available when the database is activated with the [Enterprise edition](https://docs.tigergraph.com/ml-workbench/current/editions/).\n", - "\n", - "To stream data from your DB through Kafka to the machine that runs the data loader, the only extra step is to provide the required information about your Kafka cluster and everything else works as above. \n", - "\n", - "Note that TigerGraph doesn't provide the Kafka cluster but only use it as a channel to stream data. You can use most Kafka clusters of your choice. Each batch (subgraph) will be a message, and hence if you need relatively large batches and your Kafka cluster has replicas, the `replica.fetch.max.bytes` setting on the Kafka cluster has to be large enough to accommadate the batches. Also, if you create a topic manually for the data loader to use, the `max.message.bytes` setting of the topic has to be set large enough as well. If you let the data loader to manage the topic (default), then its max message size is set to 100M by default." - ] - }, - { - "cell_type": "markdown", - "id": "acbb465c-5adb-455d-b9fb-a29208b4daa3", - "metadata": {}, - "source": [ - "### Configure Kafka\n", - "The Kafka cluster information is to be provided to the `.gds.configureKafka` function. Once configured, the settings will be shared with all newly created data loaders and no need to set up Kafka for each loader. A few important parameters to this function are:\n", - "* `kafka_address`: the only required parameter for obvious reason. \n", - "* `kafka_topic`: name of the topic to use. If it doesn't exist, the data loader will create it for you provided it has the permission. If it is not given, a topic with name like `tg_randomString` will be generated.\n", - "* `kafka_security_protocol`: If authentication is required, `SSL`, `SASL_PLAINTEXT`, `SASL_SSL` are supported as security protocal. \n", - "* `kafka_sasl_mechanism`: For the `SASL` protocal, mechanisms `PLAIN` and `GSSAPI` are supported. \n", - "\n", - "Please see the [doc](https://docs.tigergraph.com/pytigergraph/current/gds/gds#_configurekafka) for detailed settings." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "e472d752-f2b9-4029-93e5-f23bf21d1b4a", - "metadata": {}, - "outputs": [], - "source": [ - "conn.gds.configureKafka(\n", - " kafka_address=\"127.0.0.1:9092\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "22306c04-a113-46da-ad88-5b5d4c9deb88", - "metadata": {}, - "source": [ - "### Load subgraphs" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "ae1a869d-6b10-4f48-8fae-adc757e757cb", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:kafka.coordinator.consumer:group_id is None: disabling auto-commit.\n", - "ERROR:kafka.conn: [IPv4 ('34.168.46.139', 9092)]>: socket disconnected\n", - "WARNING:kafka.client:Node 0 connection failed -- refreshing metadata\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CPU times: user 60.1 ms, sys: 3.93 ms, total: 64 ms\n", - "Wall time: 1 s\n" - ] - } - ], - "source": [ - "%%time\n", - "neighbor_loader = conn.gds.neighborLoader(\n", - " batch_size=16,\n", - " num_neighbors = 10,\n", - " num_hops =2,\n", - " v_in_feats = [\"x\"],\n", - " v_out_labels = [\"y\"],\n", - " v_extra_feats = [\"train_mask\", \"val_mask\", \"test_mask\"],\n", - " e_in_feats=[\"time\"],\n", - " e_out_labels=[],\n", - " e_extra_feats=[\"is_train\", \"is_val\"],\n", - " output_format = \"PyG\",\n", - " shuffle=True,\n", - " filter_by=\"train_mask\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "d375539a-7b5c-4fe9-ab3e-53a78c9a2845", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----Batch 0----\n", - "Data(edge_index=[2, 380], edge_feat=[380], is_train=[380], is_val=[380], x=[257, 1433], y=[257], train_mask=[257], val_mask=[257], test_mask=[257], is_seed=[257])\n", - "----Batch 1----\n", - "Data(edge_index=[2, 413], edge_feat=[413], is_train=[413], is_val=[413], x=[243, 1433], y=[243], train_mask=[243], val_mask=[243], test_mask=[243], is_seed=[243])\n", - "----Batch 2----\n", - "Data(edge_index=[2, 358], edge_feat=[358], is_train=[358], is_val=[358], x=[212, 1433], y=[212], train_mask=[212], val_mask=[212], test_mask=[212], is_seed=[212])\n", - "----Batch 3----\n", - "Data(edge_index=[2, 468], edge_feat=[468], is_train=[468], is_val=[468], x=[298, 1433], y=[298], train_mask=[298], val_mask=[298], test_mask=[298], is_seed=[298])\n", - "----Batch 4----\n", - "Data(edge_index=[2, 406], edge_feat=[406], is_train=[406], is_val=[406], x=[266, 1433], y=[266], train_mask=[266], val_mask=[266], test_mask=[266], is_seed=[266])\n", - "----Batch 5----\n", - "Data(edge_index=[2, 353], edge_feat=[353], is_train=[353], is_val=[353], x=[242, 1433], y=[242], train_mask=[242], val_mask=[242], test_mask=[242], is_seed=[242])\n", - "----Batch 6----\n", - "Data(edge_index=[2, 427], edge_feat=[427], is_train=[427], is_val=[427], x=[302, 1433], y=[302], train_mask=[302], val_mask=[302], test_mask=[302], is_seed=[302])\n", - "----Batch 7----\n", - "Data(edge_index=[2, 377], edge_feat=[377], is_train=[377], is_val=[377], x=[246, 1433], y=[246], train_mask=[246], val_mask=[246], test_mask=[246], is_seed=[246])\n", - "----Batch 8----\n", - "Data(edge_index=[2, 205], edge_feat=[205], is_train=[205], is_val=[205], x=[150, 1433], y=[150], train_mask=[150], val_mask=[150], test_mask=[150], is_seed=[150])\n", - "CPU times: user 1.53 s, sys: 58 ms, total: 1.59 s\n", - "Wall time: 30.5 s\n" - ] - } - ], - "source": [ - "%%time\n", - "for i, batch in enumerate(neighbor_loader):\n", - " print(\"----Batch {}----\".format(i))\n", - " print(batch)" - ] - }, - { - "cell_type": "markdown", - "id": "6aedd4f5-fdba-4da7-88e8-0a853d40221d", - "metadata": {}, - "source": [ - "### Clean up\n", - "\n", - "When this notebook is shutdown or the neighbor_loader object is garbage collected, the Kafka topic created by this loader should be deleted automatically. However, you can call the `.stop()` member function to manually delete the topic. It also resets the loader and closes the backgroud threads." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "7ec78f44-7247-43fa-8f3b-d21a3bc495ed", - "metadata": {}, - "outputs": [], - "source": [ - "neighbor_loader.stop(remove_topics=True)" - ] - } - ], - "metadata": { - "environment": { - "name": "pytorch-gpu.1-9.m81", - "type": "gcloud", - "uri": "gcr.io/deeplearning-platform-release/pytorch-gpu.1-9:m81" - }, - "interpreter": { - "hash": "96daeecb52bbbb8e3aef04d2f9c6a1e01f271d07cea30059f3c558ef00b717d2" - }, - "kernelspec": { - "display_name": "pyTorch", - "language": "python", - "name": "pytorch" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - }, - "toc-autonumbering": false, - "toc-showtags": false - }, - "nbformat": 4, - "nbformat_minor": 5 -}