From 7a439432b79fb6c3c2646a6d268b99760bca0f97 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Mon, 22 Jul 2024 16:42:23 -0400
Subject: [PATCH 01/53] init

---
 common/config.py                              |   9 +-
 common/extractors/GraphExtractor.py           |  21 ++
 common/extractors/__init__.py                 |   1 +
 .../louvain/louvain_2_other_passes.gsql       | 217 +++++++++++++++
 .../louvain/louvain_3_final_community.gsql    |  44 +++
 .../louvain_4_modularity_1_for_pass.gsql      |  39 +++
 .../louvain/louvain_4_modularity_2_final.gsql |  52 ++++
 .../graphRAG/louvain/louvain_5_reset.gsql     |  13 +
 common/gsql/supportai/Scan_For_Updates.gsql   |  10 +-
 common/gsql/supportai/SupportAI_Schema.gsql   |  18 +-
 common/llm_services/openai_service.py         |   2 +-
 common/py_schemas/schemas.py                  |  17 +-
 copilot/app/routers/supportai.py              | 223 +++------------
 copilot/app/supportai/supportai.py            | 185 +++++++++++++
 copilot/docs/notebooks/graphrag.ipynb         | 261 ++++++++++++++++++
 .../app/eventual_consistency_checker.py       |   3 +-
 .../app/graphrag/__init__.py                  |   1 +
 .../app/graphrag/graph_rag.py                 | 138 +++++++++
 .../app/graphrag/util.py                      |  36 +++
 .../app/graphrag/worker.py                    |  27 ++
 eventual-consistency-service/app/main.py      | 142 +++++++---
 21 files changed, 1226 insertions(+), 233 deletions(-)
 create mode 100644 common/extractors/GraphExtractor.py
 create mode 100644 common/gsql/graphRAG/louvain/louvain_2_other_passes.gsql
 create mode 100644 common/gsql/graphRAG/louvain/louvain_3_final_community.gsql
 create mode 100644 common/gsql/graphRAG/louvain/louvain_4_modularity_1_for_pass.gsql
 create mode 100644 common/gsql/graphRAG/louvain/louvain_4_modularity_2_final.gsql
 create mode 100644 common/gsql/graphRAG/louvain/louvain_5_reset.gsql
 create mode 100644 copilot/app/supportai/supportai.py
 create mode 100644 copilot/docs/notebooks/graphrag.ipynb
 create mode 100644 eventual-consistency-service/app/graphrag/__init__.py
 create mode 100644 eventual-consistency-service/app/graphrag/graph_rag.py
 create mode 100644 eventual-consistency-service/app/graphrag/util.py
 create mode 100644 eventual-consistency-service/app/graphrag/worker.py

diff --git a/common/config.py b/common/config.py
index 8eb9432a..2546e38a 100644
--- a/common/config.py
+++ b/common/config.py
@@ -15,14 +15,15 @@
     AWSBedrock,
     AzureOpenAI,
     GoogleVertexAI,
-    OpenAI,
     Groq,
+    HuggingFaceEndpoint,
+    LLM_Model,
     Ollama,
-    HuggingFaceEndpoint
+    OpenAI,
 )
+from common.logs.logwriter import LogWriter
 from common.session import SessionHandler
 from common.status import StatusManager
-from common.logs.logwriter import LogWriter
 
 security = HTTPBasic()
 session_handler = SessionHandler()
@@ -102,7 +103,7 @@
     raise Exception("Embedding service not implemented")
 
 
-def get_llm_service(llm_config):
+def get_llm_service(llm_config) -> LLM_Model:
     if llm_config["completion_service"]["llm_service"].lower() == "openai":
         return OpenAI(llm_config["completion_service"])
     elif llm_config["completion_service"]["llm_service"].lower() == "azure":
diff --git a/common/extractors/GraphExtractor.py b/common/extractors/GraphExtractor.py
new file mode 100644
index 00000000..c8f24355
--- /dev/null
+++ b/common/extractors/GraphExtractor.py
@@ -0,0 +1,21 @@
+from langchain_community.graphs.graph_document import GraphDocument
+from langchain_core.documents import Document
+from langchain_experimental.graph_transformers import LLMGraphTransformer
+
+from common.config import get_llm_service, llm_config
+from common.extractors.BaseExtractor import BaseExtractor
+
+
+class GraphExtractor(BaseExtractor):
+    def __init__(self):
+        llm = get_llm_service(llm_config).llm
+        self.transformer = LLMGraphTransformer(
+            llm=llm,
+            node_properties=["description"],
+            relationship_properties=["description"],
+        )
+
+    def extract(self, text) -> list[GraphDocument]:
+        doc = Document(page_content=text)
+        graph_docs = self.transformer.convert_to_graph_documents([doc])
+        return graph_docs
diff --git a/common/extractors/__init__.py b/common/extractors/__init__.py
index ced539e4..e2f0bcdf 100644
--- a/common/extractors/__init__.py
+++ b/common/extractors/__init__.py
@@ -1,3 +1,4 @@
+from common.extractors.GraphExtractor import GraphExtractor
 from common.extractors.LLMEntityRelationshipExtractor import (
     LLMEntityRelationshipExtractor,
 )
diff --git a/common/gsql/graphRAG/louvain/louvain_2_other_passes.gsql b/common/gsql/graphRAG/louvain/louvain_2_other_passes.gsql
new file mode 100644
index 00000000..231631d6
--- /dev/null
+++ b/common/gsql/graphRAG/louvain/louvain_2_other_passes.gsql
@@ -0,0 +1,217 @@
+USE GRAPH {graph_name}
+DROP QUERY {query_name}
+CREATE OR REPLACE DISTRIBUTED QUERY graphRAG_louvain_2(
+  UINT layer = 0,
+  UINT max_hop = 10,
+  UINT batch_num = 1
+) FOR GRAPH {graph_name} SYNTAX v1 {{
+  TYPEDEF TUPLE <DOUBLE delta_Q_add, VERTEX<{entity_vertex_name}> community, STRING ext_vid> MyTuple;
+  SumAccum<DOUBLE> @@m; // the sum of the weights of all the links in the network
+  MinAccum<VERTEX<{entity_vertex_name}>> @{community_id_attribute_name}; // the community ID of the node
+  MinAccum<STRING> @community_vid; // the community ID of the node
+  SumAccum<DOUBLE> @k; // the sum of the weights of the links incident to the node
+  SumAccum<DOUBLE> @k_in; // the sum of the weights of the links inside the previous community of the node
+  SumAccum<DOUBLE> @k_self_loop; // the weight of the self-loop link
+  MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>> @community_k_in_map; // the community of the neighbors of the nodes -> the sum of the weights of the links inside the community
+  MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>> @@community_sum_total_map; // community ID C -> the sum of the weights of the links incident to nodes in C
+  SumAccum<DOUBLE> @community_sum_total; // the sum of the weights of the links incident to nodes in the community of the node
+  MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>> @@community_sum_in_map; // community ID -> the sum of the weights of the links inside the community
+  MapAccum<VERTEX<{entity_vertex_name}>, MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>>> @@source_target_k_in_map; // source community ID -> (target community ID -> the sum of the weights of the links from the source community to the target community)
+  SumAccum<DOUBLE> @delta_Q_remove; // delta Q to remove the node from the previous community
+  MaxAccum<MyTuple> @best_move; // best move of the node with the highest delta Q to move the isolated node into the new community
+  MaxAccum<DOUBLE> @@min_double; // used to reset the @best_move
+  SumAccum<INT> @@move_cnt;
+  OrAccum @to_change_community;
+  SumAccum<INT> @batch_id;
+  SumAccum<INT> @vid;
+  SumAccum<INT> @@links_to_check;
+
+  // Initialization
+  LOG(TRUE, "Query started!");
+  All_Nodes = {{{entity_vertex_name}.*}};
+  _tmp = 
+  SELECT s
+  FROM All_Nodes:s -({links_to_edge_name}:e)- :t
+  ACCUM
+    @@links_to_check += 1;
+  
+  All_Nodes =
+    SELECT s
+    FROM   All_Nodes:s -({links_to_edge_name}:e)- :t
+    WHERE  e.layer_weight_map.containsKey(layer)
+    ACCUM  DOUBLE weight = e.layer_weight_map.get(layer),
+           @@m += weight / 2,
+           s.@k += weight,
+           IF s == t THEN // self-loop link
+             s.@k_self_loop += weight
+           END
+    POST-ACCUM
+           s.@{community_id_attribute_name} = s,
+           s.@community_vid = to_string(s.id),
+           s.@vid = getvid(s),
+           s.@batch_id = s.@vid % batch_num
+  ;
+  LOG(TRUE, All_Nodes.size());
+  IF @@m < 0.00000000001 THEN
+    PRINT "Warning: the sum of the weights in the edges should be greater than zero!";
+    RETURN;
+  END;
+
+  // Local moving
+  INT hop = 0;
+  Candidates = All_Nodes;
+  WHILE Candidates.size() > 0 AND hop < max_hop DO
+    hop = hop + 1;
+    LOG(TRUE, hop);
+    IF hop == 1 THEN // first iteration
+      ChangedNodes =
+        SELECT s
+        FROM   Candidates:s -({links_to_edge_name}:e)- :t
+        WHERE  e.layer_weight_map.containsKey(layer)
+               AND s.@{community_id_attribute_name} != t.@{community_id_attribute_name}
+        ACCUM  s.@best_move += MyTuple(1 - s.@k * t.@k / (2 * @@m), t.@{community_id_attribute_name}, t.@community_vid)
+        POST-ACCUM
+               IF s.@best_move.delta_Q_add > 0 THEN // the gain (delta Q) is positive
+                 s.@to_change_community = TRUE
+               END
+        HAVING s.@to_change_community == TRUE
+      ;
+    ELSE // remaining iterations
+      // Calculate sum_total
+      Tmp =
+        SELECT s
+        FROM   All_Nodes:s
+        POST-ACCUM
+               @@community_sum_total_map += (s.@{community_id_attribute_name} -> s.@k)
+      ;
+      Tmp =
+        SELECT s
+        FROM   All_Nodes:s
+        POST-ACCUM
+               s.@community_sum_total = @@community_sum_total_map.get(s.@{community_id_attribute_name})
+      ;
+      LOG(TRUE, @@community_sum_total_map.size());
+      @@community_sum_total_map.clear();
+      // Find the best move
+      ChangedNodes = {{}};
+      FOREACH batch_id IN RANGE[0, batch_num-1] DO
+        LOG(TRUE, batch_id);
+        // Calculate the delta Q to remove the node from the previous community
+        Nodes =
+          SELECT s
+          FROM   Candidates:s -({links_to_edge_name}:e)- :t
+          WHERE  e.layer_weight_map.containsKey(layer)
+                 AND s.@batch_id == batch_id
+          ACCUM  DOUBLE weight = e.layer_weight_map.get(layer),
+                 IF s.@{community_id_attribute_name} == t.@{community_id_attribute_name} THEN
+                   s.@k_in += weight
+                 ELSE
+                   s.@community_k_in_map += (t.@{community_id_attribute_name} -> weight)
+                 END
+          POST-ACCUM
+                 s.@delta_Q_remove = 2 * s.@k_self_loop - 2 * s.@k_in + s.@k * (s.@community_sum_total - s.@k) / @@m,
+                 s.@k_in = 0,
+                 s.@best_move = MyTuple(@@min_double, s, to_string(s.id)) // reset the delta_Q_add
+        ;
+        // Find the best move
+        Nodes =
+          SELECT s
+          FROM   Nodes:s -({links_to_edge_name}:e)- :t
+          WHERE  e.layer_weight_map.containsKey(layer)
+                 AND s.@{community_id_attribute_name} != t.@{community_id_attribute_name}
+          ACCUM  DOUBLE delta_Q_add = 2 * s.@community_k_in_map.get(t.@{community_id_attribute_name}) - s.@k * t.@community_sum_total / @@m,
+                 s.@best_move += MyTuple(delta_Q_add, t.@{community_id_attribute_name}, t.@community_vid)
+          POST-ACCUM
+                 IF s.@delta_Q_remove + s.@best_move.delta_Q_add > 0 THEN // the gain (delta Q) is positive
+                   s.@to_change_community = TRUE
+                 END,
+                 s.@community_k_in_map.clear()
+          HAVING s.@to_change_community == TRUE
+        ;
+        ChangedNodes = ChangedNodes UNION Nodes;
+      END;
+    END;
+    // If two nodes swap, only change the community of one of them
+    SwapNodes =
+      SELECT s
+      FROM   ChangedNodes:s -({links_to_edge_name}:e)- :t
+      WHERE  e.layer_weight_map.containsKey(layer)
+             AND s.@best_move.community == t.@{community_id_attribute_name}
+             AND t.@to_change_community == TRUE
+             AND t.@best_move.community == s.@{community_id_attribute_name}
+             // only change the one with larger delta Q or the one with smaller @vid if delta Q are the same
+             AND (s.@delta_Q_remove + s.@best_move.delta_Q_add < t.@delta_Q_remove + t.@best_move.delta_Q_add
+                  OR (abs((s.@delta_Q_remove + s.@best_move.delta_Q_add) - (t.@delta_Q_remove + t.@best_move.delta_Q_add)) < 0.00000000001
+                      AND s.@vid > t.@vid))
+      POST-ACCUM
+             s.@to_change_community = FALSE
+    ;
+    LOG(TRUE, SwapNodes.size());
+    ChangedNodes = ChangedNodes MINUS SwapNodes;
+    LOG(TRUE, ChangedNodes.size());
+    // Place each node of ChangedNodes in the community in which the gain is maximum
+    ChangedNodes =
+      SELECT s
+      FROM   ChangedNodes:s
+      POST-ACCUM
+             s.@{community_id_attribute_name} = s.@best_move.community,
+             s.@community_vid = s.@best_move.ext_vid,
+             s.@to_change_community = FALSE
+    ;
+  
+    @@move_cnt += ChangedNodes.size();
+    // Get all neighbours of the changed node that do not belong to the node’s new community
+    Candidates =
+      SELECT t
+      FROM   ChangedNodes:s -({links_to_edge_name}:e)- :t
+      WHERE  e.layer_weight_map.containsKey(layer)
+             AND t.@{community_id_attribute_name} != s.@{community_id_attribute_name}
+    ;
+    LOG(TRUE, Candidates.size());
+  END;
+
+  PRINT @@move_cnt AS Delta;
+  
+  // Coarsening
+  LOG(TRUE, "Coarsening");
+  UINT new_layer = layer + 1;
+  @@community_sum_total_map.clear();
+  Tmp =
+    SELECT s
+    FROM   All_Nodes:s -({links_to_edge_name}:e)- :t
+    WHERE  e.layer_weight_map.containsKey(layer)
+    ACCUM  IF s.@{community_id_attribute_name} == t.@{community_id_attribute_name} THEN
+             DOUBLE weight = e.layer_weight_map.get(layer),
+             @@community_sum_in_map += (s.@{community_id_attribute_name} -> weight)
+           END
+    POST-ACCUM
+           //f_belongs_to.println(s.id, s.@{community_id_attribute_name}, new_layer),
+           INSERT INTO {belongs_to_edge_name} VALUES (s, str_to_int(s.@community_vid), new_layer),
+           IF @@community_sum_in_map.containsKey(s) THEN
+             //f_links_to.println(s.id, s.id, @@community_sum_in_map.get(s), new_layer)
+             INSERT INTO {links_to_edge_name} VALUES (s,s, (new_layer -> @@community_sum_in_map.get(s)))
+           END
+  ;
+  LOG(TRUE, @@community_sum_in_map.size());
+  @@community_sum_in_map.clear();
+  Tmp =
+    SELECT s
+    FROM   All_Nodes:s -({links_to_edge_name}:e)- :t
+    WHERE  e.layer_weight_map.containsKey(layer)
+    ACCUM  DOUBLE weight = e.layer_weight_map.get(layer),
+           IF s.@{community_id_attribute_name} != t.@{community_id_attribute_name} THEN
+             @@source_target_k_in_map += (s.@{community_id_attribute_name} -> (t.@{community_id_attribute_name} -> weight))
+           END
+    POST-ACCUM
+           IF @@source_target_k_in_map.containsKey(s) THEN
+             FOREACH (target_community, k_in) IN @@source_target_k_in_map.get(s) DO
+               //f_links_to.println(s.uniq_id, target_community, k_in, new_layer)
+               INSERT INTO {links_to_edge_name} VALUES (s,target_community, (new_layer -> k_in))
+             END
+           END
+  ;
+  LOG(TRUE, @@source_target_k_in_map.size());
+  @@source_target_k_in_map.clear();
+  PRINT @@links_to_check;
+  LOG(TRUE, "Query finished!");
+}}
diff --git a/common/gsql/graphRAG/louvain/louvain_3_final_community.gsql b/common/gsql/graphRAG/louvain/louvain_3_final_community.gsql
new file mode 100644
index 00000000..75cbad7e
--- /dev/null
+++ b/common/gsql/graphRAG/louvain/louvain_3_final_community.gsql
@@ -0,0 +1,44 @@
+USE GRAPH {graph_name}
+DROP QUERY {query_name}
+CREATE OR REPLACE DISTRIBUTED QUERY graphRAG_louvain_3(
+  UINT top_layer = 2
+) FOR GRAPH {graph_name} SYNTAX v1 {{
+  MinAccum<STRING> @{community_id_attribute_name}; // the community ID of the node
+  INT layer = top_layer;
+
+  // Initialization
+  LOG(TRUE, "Query started!");
+  All_Nodes = {{{entity_vertex_name}.*}};
+
+  // Top layer
+  Nodes =
+    SELECT t
+    FROM   All_Nodes:s -(reverse_{belongs_to_edge_name}:e)- :t
+    WHERE  layer IN e.layer_set
+    ACCUM  t.@{community_id_attribute_name} = to_string(s.id)
+  ;
+  LOG(TRUE, layer, Nodes.size());
+
+  // Other layers
+  WHILE Nodes.size() > 0 AND layer > 0 DO
+    layer = layer - 1;
+    Nodes =
+      SELECT t
+      FROM   Nodes:s -(reverse_{belongs_to_edge_name}:e)- :t
+      WHERE  layer IN e.layer_set
+      ACCUM  t.@{community_id_attribute_name} = s.@{community_id_attribute_name}
+    ;
+    LOG(TRUE, layer, Nodes.size());
+  END;
+
+  // Write to the file
+  Nodes =
+    SELECT s
+    FROM   Nodes:s
+    POST-ACCUM
+           //f.println(s.uniq_id, s.@{community_id_attribute_name})
+           s.{community_id_attribute_name} = s.@{community_id_attribute_name}
+           
+  ; 
+  LOG(TRUE, "Query finished!");
+}}
diff --git a/common/gsql/graphRAG/louvain/louvain_4_modularity_1_for_pass.gsql b/common/gsql/graphRAG/louvain/louvain_4_modularity_1_for_pass.gsql
new file mode 100644
index 00000000..0058d0ee
--- /dev/null
+++ b/common/gsql/graphRAG/louvain/louvain_4_modularity_1_for_pass.gsql
@@ -0,0 +1,39 @@
+USE GRAPH {graph_name}
+DROP QUERY {query_name}
+CREATE OR REPLACE DISTRIBUTED QUERY graphRAG_louvain_4a(
+  UINT layer=0
+) FOR GRAPH {graph_name} SYNTAX v1 {{
+  SumAccum<DOUBLE> @@sum_weight; // the sum of the weights of all the links in the network
+  MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>> @@community_total_weight_map; // community ID C -> the sum of the weights of the links incident to nodes in C
+  MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>> @@community_in_weight_map; // community ID -> the sum of the weights of the links inside the community
+  SumAccum<DOUBLE> @@modularity;
+
+  All_Nodes = {{{entity_vertex_name}.*}};
+  All_Nodes =
+    SELECT s
+    FROM   All_Nodes:s -({links_to_edge_name}:e)- :t
+    WHERE  e.layer_weight_map.containsKey(layer)
+    ACCUM  DOUBLE weight = e.layer_weight_map.get(layer),
+           IF s == t THEN
+             @@community_in_weight_map += (s -> weight)
+           END,
+           @@community_total_weight_map += (s -> weight),
+           @@sum_weight += weight
+  ;
+  LOG(TRUE, All_Nodes.size());
+  @@modularity = 0;
+  FOREACH (community, total_weight) IN @@community_total_weight_map DO
+    DOUBLE in_weight = 0;
+    IF @@community_in_weight_map.containsKey(community) THEN
+      in_weight = @@community_in_weight_map.get(community);
+    END;
+    @@modularity += in_weight / @@sum_weight - pow(total_weight / @@sum_weight, 2);
+  END;
+  // PRINT @@modularity, @@community_in_weight_map, @@community_total_weight_map, @@sum_weight;
+  PRINT layer;
+  PRINT @@modularity AS modularity;
+  PRINT @@community_total_weight_map.size() AS community_number;
+  PRINT All_Nodes.size();
+  @@community_in_weight_map.clear();
+  @@community_total_weight_map.clear();
+}}
diff --git a/common/gsql/graphRAG/louvain/louvain_4_modularity_2_final.gsql b/common/gsql/graphRAG/louvain/louvain_4_modularity_2_final.gsql
new file mode 100644
index 00000000..31ba4d0b
--- /dev/null
+++ b/common/gsql/graphRAG/louvain/louvain_4_modularity_2_final.gsql
@@ -0,0 +1,52 @@
+USE GRAPH {graph_name}
+DROP QUERY {query_name}
+CREATE OR REPLACE DISTRIBUTED QUERY graphRAG_louvain_4b(
+) FOR GRAPH {graph_name} SYNTAX v1 {{
+  SumAccum<DOUBLE> @@sum_weight; // the sum of the weights of all the links in the network
+  MapAccum<STRING, SumAccum<DOUBLE>> @@community_total_weight_map; // community ID C -> the sum of the weights of the links incident to nodes in C
+  MapAccum<STRING, SumAccum<DOUBLE>> @@community_in_weight_map; // community ID -> the sum of the weights of the links inside the community
+  SumAccum<DOUBLE> @@modularity;
+  MapAccum<STRING, SumAccum<INT>> @@Community_sizes;
+  MapAccum<INT, SumAccum<INT>> @@count_of_sizes;
+  AvgAccum @@avg_community_size;
+
+  DOUBLE wt = 1.0;
+  All_Nodes = {{{entity_vertex_name}.*}};
+  Nodes =
+    SELECT s
+    FROM   All_Nodes:s -({relation_edge_name}:e)- :t
+    ACCUM  IF s.{community_id_attribute_name} == t.{community_id_attribute_name} THEN
+             @@community_in_weight_map += (s.{community_id_attribute_name} -> wt)
+           END,
+           @@community_total_weight_map += (s.{community_id_attribute_name} -> wt),
+           @@sum_weight += wt
+  ;
+  @@modularity = 0;
+  FOREACH (community, total_weight) IN @@community_total_weight_map DO
+    DOUBLE in_weight = 0;
+    IF @@community_in_weight_map.containsKey(community) THEN
+      in_weight = @@community_in_weight_map.get(community);
+    END;
+    @@modularity += in_weight / @@sum_weight - pow(total_weight / @@sum_weight, 2);
+  END;
+  
+  _tmp = 
+    SELECT s
+    FROM All_Nodes:s
+    POST-ACCUM
+      @@Community_sizes += (s.{community_id_attribute_name} -> 1);
+  
+  FOREACH (comm, cnt) IN @@Community_sizes DO
+    @@count_of_sizes += (cnt -> 1);
+    @@avg_community_size += cnt;
+  END;
+  
+  // PRINT @@modularity, @@community_in_weight_map, @@community_total_weight_map, @@sum_weight;
+  PRINT @@modularity AS modularity;
+  PRINT @@community_total_weight_map.size() AS community_number;
+  PRINT @@count_of_sizes AS num_communities_by_size;
+  PRINT @@avg_community_size AS avg_community_size;
+  
+  @@community_in_weight_map.clear();
+  @@community_total_weight_map.clear();
+}}
diff --git a/common/gsql/graphRAG/louvain/louvain_5_reset.gsql b/common/gsql/graphRAG/louvain/louvain_5_reset.gsql
new file mode 100644
index 00000000..7590935a
--- /dev/null
+++ b/common/gsql/graphRAG/louvain/louvain_5_reset.gsql
@@ -0,0 +1,13 @@
+USE GRAPH {graph_name}
+DROP QUERY {query_name}
+CREATE OR REPLACE DISTRIBUTED QUERY graphRAG_louvain_5_reset(
+) FOR GRAPH {graph_name} SYNTAX v1 {{
+
+  // Initialization
+  Nodes = {{{entity_vertex_name}.*}};
+
+  // Top layer
+  DELETE e
+  FROM   Nodes:s -(({belongs_to_edge_name}|{links_to_edge_name}):e)- :t
+  ;
+}}
diff --git a/common/gsql/supportai/Scan_For_Updates.gsql b/common/gsql/supportai/Scan_For_Updates.gsql
index 03ced2ec..7d9d1b83 100644
--- a/common/gsql/supportai/Scan_For_Updates.gsql
+++ b/common/gsql/supportai/Scan_For_Updates.gsql
@@ -24,10 +24,10 @@ CREATE DISTRIBUTED QUERY Scan_For_Updates(STRING v_type = "Document",
       res = SELECT s FROM start:s -(HAS_CONTENT)-> Content:c
             ACCUM @@v_and_text += (s.id -> c.text)
             POST-ACCUM s.epoch_processing = datetime_to_epoch(now());
-  ELSE IF v_type == "Concept" THEN
-      res = SELECT s FROM start:s
-            POST-ACCUM @@v_and_text += (s.id -> s.description),
-            s.epoch_processing = datetime_to_epoch(now());
+  // ELSE IF v_type == "Concept" THEN
+  //     res = SELECT s FROM start:s
+  //           POST-ACCUM @@v_and_text += (s.id -> s.description),
+  //           s.epoch_processing = datetime_to_epoch(now());
   ELSE IF v_type == "Entity" THEN
       res = SELECT s FROM start:s
             POST-ACCUM @@v_and_text += (s.id -> s.definition),
@@ -42,4 +42,4 @@ CREATE DISTRIBUTED QUERY Scan_For_Updates(STRING v_type = "Document",
             POST-ACCUM s.epoch_processing = datetime_to_epoch(now());                         
   END;
   PRINT @@v_and_text;
-}
\ No newline at end of file
+}
diff --git a/common/gsql/supportai/SupportAI_Schema.gsql b/common/gsql/supportai/SupportAI_Schema.gsql
index 061993bb..0998affe 100644
--- a/common/gsql/supportai/SupportAI_Schema.gsql
+++ b/common/gsql/supportai/SupportAI_Schema.gsql
@@ -2,7 +2,7 @@ CREATE SCHEMA_CHANGE JOB add_supportai_schema {
     ADD VERTEX DocumentChunk(PRIMARY_ID id STRING, idx INT, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX Document(PRIMARY_ID id STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX Concept(PRIMARY_ID id STRING, description STRING, concept_type STRING, human_curated BOOL, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
-    ADD VERTEX Entity(PRIMARY_ID id STRING, definition STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
+    ADD VERTEX Entity(PRIMARY_ID id STRING, definition STRING, description STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX Relationship(PRIMARY_ID id STRING, definition STRING, short_name STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX DocumentCollection(PRIMARY_ID id STRING, epoch_added UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX Content(PRIMARY_ID id STRING, text STRING, epoch_added UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
@@ -18,4 +18,18 @@ CREATE SCHEMA_CHANGE JOB add_supportai_schema {
     ADD DIRECTED EDGE HAS_CHILD(FROM Document, TO DocumentChunk) WITH REVERSE_EDGE="reverse_HAS_CHILD";
     ADD DIRECTED EDGE HAS_RELATIONSHIP(FROM Concept, TO Concept, relation_type STRING) WITH REVERSE_EDGE="reverse_HAS_RELATIONSHIP";
     ADD DIRECTED EDGE CONTAINS_DOCUMENT(FROM DocumentCollection, TO Document) WITH REVERSE_EDGE="reverse_CONTAINS_DOCUMENT";
-}
\ No newline at end of file
+
+    // GraphRAG
+    ADD VERTEX Community(PRIMARY_ID id STRING, description INT) WITH PRIMARY_ID_AS_ATTRIBUTE="true";
+    ADD VERTEX ResolvedEntity(PRIMARY_ID id STRING, description STRING) WITH PRIMARY_ID_AS_ATTRIBUTE="true";
+
+    ADD DIRECTED EDGE KNN(FROM Entity, TO Entity); // TODO: check where knn algo writes results
+    ADD DIRECTED EDGE RESOLVES_TO(FROM Entity, TO ResolvedEntity); // Connect ResolvedEntities with their children entities
+    ADD DIRECTED EDGE RESOLVED_RELATIONSHIP(FROM ResolvedEntity, TO ResolvedEntity, weight UINT); // store edges between entities after they're resolved
+    ADD DIRECTED EDGE IN_COMMUNITY(FROM ResolvedEntity, TO Community);
+
+    // TODO: louvain will be run on resolved entities, but stored in community then on communities until louvain runs out
+    // Hierarchical communities (Louvain/Leiden)
+    // ADD UNDIRECTED EDGE LINKS_TO(FROM Community, TO Community);
+    // ADD DIRECTED EDGE BELONGS_TO(FROM Community, TO Community);
+}
diff --git a/common/llm_services/openai_service.py b/common/llm_services/openai_service.py
index 914f6364..c7274720 100644
--- a/common/llm_services/openai_service.py
+++ b/common/llm_services/openai_service.py
@@ -1,5 +1,6 @@
 import logging
 import os
+from langchain_openai import ChatOpenAI
 
 from common.llm_services import LLM_Model
 from common.logs.log import req_id_cv
@@ -16,7 +17,6 @@ def __init__(self, config):
                 auth_detail
             ]
 
-        from langchain.chat_models import ChatOpenAI
 
         model_name = config["llm_model"]
         self.llm = ChatOpenAI(
diff --git a/common/py_schemas/schemas.py b/common/py_schemas/schemas.py
index e5dd1faf..07a2113f 100644
--- a/common/py_schemas/schemas.py
+++ b/common/py_schemas/schemas.py
@@ -15,11 +15,13 @@ class SupportAIQuestion(BaseModel):
     method_params: dict = {}
 
 
-class SupportAIInitConfig(BaseModel):
-    chunker: str
-    chunker_params: dict
-    extractor: str
-    extractor_params: dict
+class SupportAIMethod(enum.StrEnum):
+    SUPPORTAI = enum.auto()
+    GRAPHRAG = enum.auto()
+
+
+class EccConfig(BaseModel):
+    method: SupportAIMethod = SupportAIMethod.SUPPORTAI
 
 
 class GSQLQueryInfo(BaseModel):
@@ -126,15 +128,18 @@ class QueryUpsertRequest(BaseModel):
     id: Optional[str]
     query_info: Optional[GSQLQueryInfo]
 
+
 class MessageContext(BaseModel):
     # TODO: fix this to contain proper message context
     user: str
     content: str
 
+
 class ReportQuestions(BaseModel):
     question: str
     reasoning: str
 
+
 class ReportSection(BaseModel):
     section_name: str
     description: str
@@ -142,6 +147,7 @@ class ReportSection(BaseModel):
     copilot_fortify: bool = True
     actions: Optional[List[str]] = None
 
+
 class ReportCreationRequest(BaseModel):
     topic: str
     sections: Union[List[ReportSection], str] = None
@@ -150,6 +156,7 @@ class ReportCreationRequest(BaseModel):
     conversation_id: Optional[str] = None
     message_context: Optional[List[MessageContext]] = None
 
+
 class Role(enum.StrEnum):
     SYSTEM = enum.auto()
     USER = enum.auto()
diff --git a/copilot/app/routers/supportai.py b/copilot/app/routers/supportai.py
index a3c94951..a829d3a4 100644
--- a/copilot/app/routers/supportai.py
+++ b/copilot/app/routers/supportai.py
@@ -1,22 +1,38 @@
 import json
 import logging
-import uuid
 from typing import Annotated
 
-from fastapi import APIRouter, BackgroundTasks, Depends, Request
+from fastapi import APIRouter, BackgroundTasks, Depends, Request, Response, status
 from fastapi.security.http import HTTPBase
+from supportai import supportai
 from supportai.concept_management.create_concepts import (
-    CommunityConceptCreator, EntityConceptCreator, HigherLevelConceptCreator,
-    RelationshipConceptCreator)
-from supportai.retrievers import (EntityRelationshipRetriever,
-                                  HNSWOverlapRetriever, HNSWRetriever,
-                                  HNSWSiblingRetriever)
-
-from common.config import (db_config, embedding_service, embedding_store,
-                           get_llm_service, llm_config)
+    CommunityConceptCreator,
+    EntityConceptCreator,
+    HigherLevelConceptCreator,
+    RelationshipConceptCreator,
+)
+from supportai.retrievers import (
+    EntityRelationshipRetriever,
+    HNSWOverlapRetriever,
+    HNSWRetriever,
+    HNSWSiblingRetriever,
+)
+
+from common.config import (
+    db_config,
+    embedding_service,
+    embedding_store,
+    get_llm_service,
+    llm_config,
+)
 from common.logs.logwriter import LogWriter
-from common.py_schemas.schemas import (CoPilotResponse, CreateIngestConfig,
-                                       LoadingInfo, SupportAIQuestion)
+from common.py_schemas.schemas import (  # SupportAIInitConfig,; SupportAIMethod,
+    CoPilotResponse,
+    CreateIngestConfig,
+    LoadingInfo,
+    SupportAIMethod,
+    SupportAIQuestion,
+)
 
 logger = logging.getLogger(__name__)
 router = APIRouter(tags=["SupportAI"])
@@ -26,50 +42,14 @@
 
 @router.post("/{graphname}/supportai/initialize")
 def initialize(
-    graphname, conn: Request, credentials: Annotated[HTTPBase, Depends(security)]
+    graphname,
+    conn: Request,
+    credentials: Annotated[HTTPBase, Depends(security)],
 ):
     conn = conn.state.conn
-    # need to open the file using the absolute path
-    file_path = "common/gsql/supportai/SupportAI_Schema.gsql"
-    with open(file_path, "r") as f:
-        schema = f.read()
-    schema_res = conn.gsql(
-        """USE GRAPH {}\n{}\nRUN SCHEMA_CHANGE JOB add_supportai_schema""".format(
-            graphname, schema
-        )
-    )
-
-    file_path = "common/gsql/supportai/SupportAI_IndexCreation.gsql"
-    with open(file_path) as f:
-        index = f.read()
-    index_res = conn.gsql(
-        """USE GRAPH {}\n{}\nRUN SCHEMA_CHANGE JOB add_supportai_indexes""".format(
-            graphname, index
-        )
-    )
-
-    file_path = "common/gsql/supportai/Scan_For_Updates.gsql"
-    with open(file_path) as f:
-        scan_for_updates = f.read()
-    res = conn.gsql(
-        "USE GRAPH "
-        + conn.graphname
-        + "\n"
-        + scan_for_updates
-        + "\n INSTALL QUERY Scan_For_Updates"
-    )
-
-    file_path = "common/gsql/supportai/Update_Vertices_Processing_Status.gsql"
-    with open(file_path) as f:
-        update_vertices = f.read()
-    res = conn.gsql(
-        "USE GRAPH "
-        + conn.graphname
-        + "\n"
-        + update_vertices
-        + "\n INSTALL QUERY Update_Vertices_Processing_Status"
-    )
 
+    resp = supportai.init_supportai(conn, graphname)
+    schema_res, index_res = resp[0], resp[1]
     return {
         "host_name": conn._tg_connection.host,  # include host_name for debugging from client. Their pyTG conn might not have the same host as what's configured in copilot
         "schema_creation_status": json.dumps(schema_res),
@@ -80,132 +60,13 @@ def initialize(
 @router.post("/{graphname}/supportai/create_ingest")
 def create_ingest(
     graphname,
-    ingest_config: CreateIngestConfig,
+    cfg: CreateIngestConfig,
     conn: Request,
     credentials: Annotated[HTTPBase, Depends(security)],
 ):
     conn = conn.state.conn
 
-    if ingest_config.file_format.lower() == "json":
-        file_path = "common/gsql/supportai/SupportAI_InitialLoadJSON.gsql"
-
-        with open(file_path) as f:
-            ingest_template = f.read()
-        ingest_template = ingest_template.replace("@uuid@", str(uuid.uuid4().hex))
-        doc_id = ingest_config.loader_config.get("doc_id_field", "doc_id")
-        doc_text = ingest_config.loader_config.get("content_field", "content")
-        ingest_template = ingest_template.replace('"doc_id"', '"{}"'.format(doc_id))
-        ingest_template = ingest_template.replace('"content"', '"{}"'.format(doc_text))
-
-    if ingest_config.file_format.lower() == "csv":
-        file_path = "common/gsql/supportai/SupportAI_InitialLoadCSV.gsql"
-
-        with open(file_path) as f:
-            ingest_template = f.read()
-        ingest_template = ingest_template.replace("@uuid@", str(uuid.uuid4().hex))
-        separator = ingest_config.get("separator", "|")
-        header = ingest_config.get("header", "true")
-        eol = ingest_config.get("eol", "\n")
-        quote = ingest_config.get("quote", "double")
-        ingest_template = ingest_template.replace('"|"', '"{}"'.format(separator))
-        ingest_template = ingest_template.replace('"true"', '"{}"'.format(header))
-        ingest_template = ingest_template.replace('"\\n"', '"{}"'.format(eol))
-        ingest_template = ingest_template.replace('"double"', '"{}"'.format(quote))
-
-    file_path = "common/gsql/supportai/SupportAI_DataSourceCreation.gsql"
-
-    with open(file_path) as f:
-        data_stream_conn = f.read()
-
-    # assign unique identifier to the data stream connection
-
-    data_stream_conn = data_stream_conn.replace(
-        "@source_name@", "SupportAI_" + graphname + "_" + str(uuid.uuid4().hex)
-    )
-
-    # check the data source and create the appropriate connection
-    if ingest_config.data_source.lower() == "s3":
-        data_conn = ingest_config.data_source_config
-        if (
-            data_conn.get("aws_access_key") is None
-            or data_conn.get("aws_secret_key") is None
-        ):
-            raise Exception("AWS credentials not provided")
-        connector = {
-            "type": "s3",
-            "access.key": data_conn["aws_access_key"],
-            "secret.key": data_conn["aws_secret_key"],
-        }
-
-        data_stream_conn = data_stream_conn.replace(
-            "@source_config@", json.dumps(connector)
-        )
-
-    elif ingest_config.data_source.lower() == "azure":
-        if ingest_config.data_source_config.get("account_key") is not None:
-            connector = {
-                "type": "abs",
-                "account.key": ingest_config.data_source_config["account_key"],
-            }
-        elif ingest_config.data_source_config.get("client_id") is not None:
-            # verify that the client secret is also provided
-            if ingest_config.data_source_config.get("client_secret") is None:
-                raise Exception("Client secret not provided")
-            # verify that the tenant id is also provided
-            if ingest_config.data_source_config.get("tenant_id") is None:
-                raise Exception("Tenant id not provided")
-            connector = {
-                "type": "abs",
-                "client.id": ingest_config.data_source_config["client_id"],
-                "client.secret": ingest_config.data_source_config["client_secret"],
-                "tenant.id": ingest_config.data_source_config["tenant_id"],
-            }
-        else:
-            raise Exception("Azure credentials not provided")
-        data_stream_conn = data_stream_conn.replace(
-            "@source_config@", json.dumps(connector)
-        )
-    elif ingest_config.data_source.lower() == "gcs":
-        # verify that the correct fields are provided
-        if ingest_config.data_source_config.get("project_id") is None:
-            raise Exception("Project id not provided")
-        if ingest_config.data_source_config.get("private_key_id") is None:
-            raise Exception("Private key id not provided")
-        if ingest_config.data_source_config.get("private_key") is None:
-            raise Exception("Private key not provided")
-        if ingest_config.data_source_config.get("client_email") is None:
-            raise Exception("Client email not provided")
-        connector = {
-            "type": "gcs",
-            "project_id": ingest_config.data_source_config["project_id"],
-            "private_key_id": ingest_config.data_source_config["private_key_id"],
-            "private_key": ingest_config.data_source_config["private_key"],
-            "client_email": ingest_config.data_source_config["client_email"],
-        }
-        data_stream_conn = data_stream_conn.replace(
-            "@source_config@", json.dumps(connector)
-        )
-    else:
-        raise Exception("Data source not implemented")
-
-    load_job_created = conn.gsql("USE GRAPH {}\n".format(graphname) + ingest_template)
-
-    data_source_created = conn.gsql(
-        "USE GRAPH {}\n".format(graphname) + data_stream_conn
-    )
-
-    return {
-        "load_job_id": load_job_created.split(":")[1]
-        .strip(" [")
-        .strip(" ")
-        .strip(".")
-        .strip("]"),
-        "data_source_id": data_source_created.split(":")[1]
-        .strip(" [")
-        .strip(" ")
-        .strip(".")
-        .strip("]"),
-    }
+    return supportai.create_ingest(graphname, cfg, conn)
 
 
 @router.post("/{graphname}/supportai/ingest")
@@ -387,18 +248,24 @@ def build_concepts(
     return {"status": "success"}
 
 
-@router.get("/{graphname}/supportai/forceupdate")
-def ecc(
-    graphname,
+@router.get("/{graphname}/{method}/forceupdate")
+def supportai_update(
+    graphname: str,
+    method: str,
     conn: Request,
     credentials: Annotated[HTTPBase, Depends(security)],
     bg_tasks: BackgroundTasks,
+    response: Response,
 ):
+    if method != SupportAIMethod.SUPPORTAI and method != SupportAIMethod.GRAPHRAG:
+        response.status_code = status.HTTP_404_NOT_FOUND
+        return f"{method} is not a valid method. {SupportAIMethod.SUPPORTAI} or {SupportAIMethod.GRAPHRAG}"
+
     from httpx import get as http_get
 
     ecc = (
         db_config.get("ecc", "http://localhost:8001")
-        + f"/{graphname}/consistency_status"
+        + f"/{graphname}/consistency_status/{method}"
     )
     LogWriter.info(f"Sending ECC request to: {ecc}")
     bg_tasks.add_task(
diff --git a/copilot/app/supportai/supportai.py b/copilot/app/supportai/supportai.py
new file mode 100644
index 00000000..e96663a3
--- /dev/null
+++ b/copilot/app/supportai/supportai.py
@@ -0,0 +1,185 @@
+import json
+import uuid
+
+from pyTigerGraph import TigerGraphConnection
+
+from common.py_schemas.schemas import (
+    # CoPilotResponse,
+    CreateIngestConfig,
+    # LoadingInfo,
+    # SupportAIInitConfig,
+    # SupportAIMethod,
+    # SupportAIQuestion,
+)
+
+
+def init_supportai(conn: TigerGraphConnection, graphname: str) -> tuple[dict, dict]:
+    # need to open the file using the absolute path
+    file_path = "common/gsql/supportai/SupportAI_Schema.gsql"
+    with open(file_path, "r") as f:
+        schema = f.read()
+    schema_res = conn.gsql(
+        """USE GRAPH {}\n{}\nRUN SCHEMA_CHANGE JOB add_supportai_schema""".format(
+            graphname, schema
+        )
+    )
+
+    file_path = "common/gsql/supportai/SupportAI_IndexCreation.gsql"
+    with open(file_path) as f:
+        index = f.read()
+    index_res = conn.gsql(
+        """USE GRAPH {}\n{}\nRUN SCHEMA_CHANGE JOB add_supportai_indexes""".format(
+            graphname, index
+        )
+    )
+
+    file_path = "common/gsql/supportai/Scan_For_Updates.gsql"
+    with open(file_path) as f:
+        scan_for_updates = f.read()
+    res = conn.gsql(
+        "USE GRAPH "
+        + conn.graphname
+        + "\n"
+        + scan_for_updates
+        + "\n INSTALL QUERY Scan_For_Updates"
+    )
+
+    file_path = "common/gsql/supportai/Update_Vertices_Processing_Status.gsql"
+    with open(file_path) as f:
+        update_vertices = f.read()
+    res = conn.gsql(
+        "USE GRAPH "
+        + conn.graphname
+        + "\n"
+        + update_vertices
+        + "\n INSTALL QUERY Update_Vertices_Processing_Status"
+    )
+
+    return schema_res, index_res
+
+
+def create_ingest(
+    graphname: str,
+    ingest_config: CreateIngestConfig,
+    conn: TigerGraphConnection,
+):
+    if ingest_config.file_format.lower() == "json":
+        file_path = "common/gsql/supportai/SupportAI_InitialLoadJSON.gsql"
+
+        with open(file_path) as f:
+            ingest_template = f.read()
+        ingest_template = ingest_template.replace("@uuid@", str(uuid.uuid4().hex))
+        doc_id = ingest_config.loader_config.get("doc_id_field", "doc_id")
+        doc_text = ingest_config.loader_config.get("content_field", "content")
+        ingest_template = ingest_template.replace('"doc_id"', '"{}"'.format(doc_id))
+        ingest_template = ingest_template.replace('"content"', '"{}"'.format(doc_text))
+
+    if ingest_config.file_format.lower() == "csv":
+        file_path = "common/gsql/supportai/SupportAI_InitialLoadCSV.gsql"
+
+        with open(file_path) as f:
+            ingest_template = f.read()
+        ingest_template = ingest_template.replace("@uuid@", str(uuid.uuid4().hex))
+        separator = ingest_config.get("separator", "|")
+        header = ingest_config.get("header", "true")
+        eol = ingest_config.get("eol", "\n")
+        quote = ingest_config.get("quote", "double")
+        ingest_template = ingest_template.replace('"|"', '"{}"'.format(separator))
+        ingest_template = ingest_template.replace('"true"', '"{}"'.format(header))
+        ingest_template = ingest_template.replace('"\\n"', '"{}"'.format(eol))
+        ingest_template = ingest_template.replace('"double"', '"{}"'.format(quote))
+
+    file_path = "common/gsql/supportai/SupportAI_DataSourceCreation.gsql"
+
+    with open(file_path) as f:
+        data_stream_conn = f.read()
+
+    # assign unique identifier to the data stream connection
+
+    data_stream_conn = data_stream_conn.replace(
+        "@source_name@", "SupportAI_" + graphname + "_" + str(uuid.uuid4().hex)
+    )
+
+    # check the data source and create the appropriate connection
+    if ingest_config.data_source.lower() == "s3":
+        data_conn = ingest_config.data_source_config
+        if (
+            data_conn.get("aws_access_key") is None
+            or data_conn.get("aws_secret_key") is None
+        ):
+            raise Exception("AWS credentials not provided")
+        connector = {
+            "type": "s3",
+            "access.key": data_conn["aws_access_key"],
+            "secret.key": data_conn["aws_secret_key"],
+        }
+
+        data_stream_conn = data_stream_conn.replace(
+            "@source_config@", json.dumps(connector)
+        )
+
+    elif ingest_config.data_source.lower() == "azure":
+        if ingest_config.data_source_config.get("account_key") is not None:
+            connector = {
+                "type": "abs",
+                "account.key": ingest_config.data_source_config["account_key"],
+            }
+        elif ingest_config.data_source_config.get("client_id") is not None:
+            # verify that the client secret is also provided
+            if ingest_config.data_source_config.get("client_secret") is None:
+                raise Exception("Client secret not provided")
+            # verify that the tenant id is also provided
+            if ingest_config.data_source_config.get("tenant_id") is None:
+                raise Exception("Tenant id not provided")
+            connector = {
+                "type": "abs",
+                "client.id": ingest_config.data_source_config["client_id"],
+                "client.secret": ingest_config.data_source_config["client_secret"],
+                "tenant.id": ingest_config.data_source_config["tenant_id"],
+            }
+        else:
+            raise Exception("Azure credentials not provided")
+        data_stream_conn = data_stream_conn.replace(
+            "@source_config@", json.dumps(connector)
+        )
+    elif ingest_config.data_source.lower() == "gcs":
+        # verify that the correct fields are provided
+        if ingest_config.data_source_config.get("project_id") is None:
+            raise Exception("Project id not provided")
+        if ingest_config.data_source_config.get("private_key_id") is None:
+            raise Exception("Private key id not provided")
+        if ingest_config.data_source_config.get("private_key") is None:
+            raise Exception("Private key not provided")
+        if ingest_config.data_source_config.get("client_email") is None:
+            raise Exception("Client email not provided")
+        connector = {
+            "type": "gcs",
+            "project_id": ingest_config.data_source_config["project_id"],
+            "private_key_id": ingest_config.data_source_config["private_key_id"],
+            "private_key": ingest_config.data_source_config["private_key"],
+            "client_email": ingest_config.data_source_config["client_email"],
+        }
+        data_stream_conn = data_stream_conn.replace(
+            "@source_config@", json.dumps(connector)
+        )
+    else:
+        raise Exception("Data source not implemented")
+
+    load_job_created = conn.gsql("USE GRAPH {}\n".format(graphname) + ingest_template)
+
+    data_source_created = conn.gsql(
+        "USE GRAPH {}\n".format(graphname) + data_stream_conn
+    )
+
+    return {
+        "load_job_id": load_job_created.split(":")[1]
+        .strip(" [")
+        .strip(" ")
+        .strip(".")
+        .strip("]"),
+        "data_source_id": data_source_created.split(":")[1]
+        .strip(" [")
+        .strip(" ")
+        .strip(".")
+        .strip("]"),
+    }
diff --git a/copilot/docs/notebooks/graphrag.ipynb b/copilot/docs/notebooks/graphrag.ipynb
new file mode 100644
index 00000000..3b1200af
--- /dev/null
+++ b/copilot/docs/notebooks/graphrag.ipynb
@@ -0,0 +1,261 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from pyTigerGraph import TigerGraphConnection\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "load_dotenv()\n",
+    "# We first create a connection to the database\n",
+    "host = os.environ[\"HOST\"]\n",
+    "username = os.getenv(\"USERNAME\", \"tigergraph\")\n",
+    "password = os.getenv(\"PASS\", \"tigergraph\")\n",
+    "conn = TigerGraphConnection(\n",
+    "    host=host, username=username, password=password, graphname=\"GraphRAG_pytgdocs\"\n",
+    ")\n",
+    "\n",
+    "conn.getToken()\n",
+    "\n",
+    "# And then add CoPilot's address to the connection. This address\n",
+    "# is the host's address where the CoPilot container is running.\n",
+    "conn.ai.configureCoPilotHost(\"http://localhost:8000\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "conn.graphname = \"GraphRAG_pytgdocs\"\n",
+    "# conn.gsql(\"\"\"CREATE GRAPH pyTigerGraphRAG()\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'host_name': 'https://algotesting.i.tgcloud.io',\n",
+       " 'schema_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_schema].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_schema\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 0)\\\\nTrying to add local vertex \\'DocumentChunk\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Document\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Concept\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Entity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Relationship\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'DocumentCollection\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Content\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Community\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'ResolvedEntity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CONTENT\\' and its reverse edge \\'reverse_HAS_CONTENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_CHILD_OF\\' and its reverse edge \\'reverse_IS_CHILD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_HEAD_OF\\' and its reverse edge \\'reverse_IS_HEAD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_TAIL\\' and its reverse edge \\'reverse_HAS_TAIL\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_RELATIONSHIP\\' and its reverse edge \\'reverse_DESCRIBES_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_ENTITY\\' and its reverse edge \\'reverse_DESCRIBES_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_ENTITY\\' and its reverse edge \\'reverse_CONTAINS_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'MENTIONS_RELATIONSHIP\\' and its reverse edge \\'reverse_MENTIONS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_AFTER\\' and its reverse edge \\'reverse_IS_AFTER\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CHILD\\' and its reverse edge \\'reverse_HAS_CHILD\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_RELATIONSHIP\\' and its reverse edge \\'reverse_HAS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_DOCUMENT\\' and its reverse edge \\'reverse_CONTAINS_DOCUMENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'KNN\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVES_TO\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVED_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IN_COMMUNITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 1\\\\nThe job add_supportai_schema completes in 2.829 seconds!\\\\nLocal schema change succeeded.\"',\n",
+       " 'index_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_indexes].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_indexes\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 1)\\\\nTrying to add index \\'doc_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_indexepoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 2\\\\nThe job add_supportai_indexes completes in 2.002 seconds!\\\\nLocal schema change succeeded.\"'}"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "conn.ai.initializeSupportAI()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "access = os.environ[\"AWS_ACCESS_KEY_ID\"]\n",
+    "sec = os.environ[\"AWS_SECRET_ACCESS_KEY\"]\n",
+    "res = conn.ai.createDocumentIngest(\n",
+    "    data_source=\"s3\",\n",
+    "    data_source_config={\"aws_access_key\": access, \"aws_secret_key\": sec},\n",
+    "    loader_config={\"doc_id_field\": \"url\", \"content_field\": \"content\"},\n",
+    "    file_format=\"json\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'job_name': 'load_documents_content_json_75b43aab4f714888b2be3f30441e745a',\n",
+       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_75b43aab4f714888b2be3f30441e745a.stream.SupportAI_GraphRAG_pytgdocs_f0e175af264a4a18b1aa3bf8f4063d0e.1721674044503',\n",
+       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_75b43aab4f714888b2be3f30441e745a.stream.SupportAI_GraphRAG_pytgdocs_f0e175af264a4a18b1aa3bf8f4063d0e.1721674044503'}"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "conn.ai.runDocumentIngest(\n",
+    "    res[\"load_job_id\"],\n",
+    "    res[\"data_source_id\"],\n",
+    "    \"s3://tg-documentation/pytg_current/pytg_current.jsonl\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import httpx\n",
+    "import base64\n",
+    "\n",
+    "# conn.ai.forceConsistencyUpdate()\n",
+    "# url = self.nlqs_host+\"/\"+self.conn.graphname+\"/supportai/forceupdate\"\n",
+    "# return self.conn._req(\"GET\", url, authMode=\"pwd\", resKey=None)\n",
+    "httpx.get(f\"http://localhost:8000/{conn.graphname}/supportai/forceupdate\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "content='Hello! How can I assist you today?' response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 8, 'total_tokens': 17}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-2a50fab6-62fc-433c-98b4-221346ca41c6-0' usage_metadata={'input_tokens': 8, 'output_tokens': 9, 'total_tokens': 17}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Joke(setup='Why was the cat sitting on the computer?', punchline='To keep an eye on the mouse!')"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "\n",
+    "class Joke(BaseModel):\n",
+    "    setup: str = Field(description=\"The setup of the joke\")\n",
+    "    punchline: str = Field(description=\"The punchline to the joke\")\n",
+    "\n",
+    "\n",
+    "model = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)\n",
+    "print(model.invoke('hi'))\n",
+    "structured_llm = model.with_structured_output(Joke)\n",
+    "structured_llm.invoke(\"Tell me a joke about cats\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.documents import Document\n",
+    "from langchain_experimental.graph_transformers import LLMGraphTransformer\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "import os\n",
+    "# from langchain_core.pydantic_v1 import BaseModel\n",
+    "from pydantic import BaseModel\n",
+    "\n",
+    "\n",
+    "class AnswerWithJustification(BaseModel):\n",
+    "    \"\"\"An answer to the user question along with justification for the answer.\"\"\"\n",
+    "    answer: str\n",
+    "    justification: str\n",
+    "\n",
+    "\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
+    "model_name = \"gpt-4o-mini\"\n",
+    "llm = ChatOpenAI(model=model_name, temperature=0)\n",
+    "# sllm = llm.with_structured_output(AnswerWithJustification)\n",
+    "# print(sllm.invoke(\"What weighs more a pound of bricks or a pound of feathers\"))\n",
+    "\n",
+    "class GraphExtractor:\n",
+    "    def __init__(self):\n",
+    "        self.transformer = LLMGraphTransformer(\n",
+    "            llm=llm,\n",
+    "            node_properties=[\"description\"],\n",
+    "            relationship_properties=[\"description\"],\n",
+    "        )\n",
+    "\n",
+    "    def extract(self, text):\n",
+    "        doc = Document(page_content=text)\n",
+    "        graph_docs = self.transformer.convert_to_graph_documents([doc])\n",
+    "        return graph_docs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "id='Marie Curie' type='Person' properties={'description': 'A Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.'}\n",
+      "id='Pierre Curie' type='Person' properties={'description': 'Husband of Marie Curie and co-winner of her first Nobel Prize.'}\n",
+      "id='University Of Paris' type='Institution' properties={'description': 'The institution where Marie Curie became the first woman professor in 1906.'}\n",
+      "id='Nobel Prize' type='Award' properties={'description': 'An award won by Marie Curie, first woman to win it and first person to win it twice.'}\n",
+      "source=Node(id='Marie Curie', type='Person') target=Node(id='Pierre Curie', type='Person') type='HUSBAND' properties={'description': \"Marie Curie's husband and co-winner of her first Nobel Prize.\"}\n",
+      "source=Node(id='Marie Curie', type='Person') target=Node(id='Nobel Prize', type='Award') type='WINNER' properties={'description': 'First woman to win a Nobel Prize.'}\n",
+      "source=Node(id='Marie Curie', type='Person') target=Node(id='Nobel Prize', type='Award') type='WINNER' properties={'description': 'First person to win a Nobel Prize twice.'}\n",
+      "source=Node(id='Marie Curie', type='Person') target=Node(id='Nobel Prize', type='Award') type='WINNER' properties={'description': 'Only person to win a Nobel Prize in two scientific fields.'}\n",
+      "source=Node(id='Marie Curie', type='Person') target=Node(id='University Of Paris', type='Institution') type='PROFESSOR' properties={'description': 'First woman to become a professor at the University of Paris in 1906.'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "text = \"\"\"\n",
+    "Marie Curie, born in 1867, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.\n",
+    "She was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.\n",
+    "Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.\n",
+    "She was, in 1906, the first woman to become a professor at the University of Paris.\n",
+    "\"\"\"\n",
+    "ge = GraphExtractor()\n",
+    "\n",
+    "docs = ge.extract(text)\n",
+    "for d in docs:\n",
+    "    for n in d.nodes:\n",
+    "        print(n)\n",
+    "    for r in d.relationships:\n",
+    "        print(r)\n",
+    "# print(f\"Nodes:{docs[0].nodes}\")\n",
+    "# print(f\"Relationships:{docs[0].relationships}\")\n",
+    "# docs"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ml",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/eventual-consistency-service/app/eventual_consistency_checker.py b/eventual-consistency-service/app/eventual_consistency_checker.py
index 007330bd..fa16694e 100644
--- a/eventual-consistency-service/app/eventual_consistency_checker.py
+++ b/eventual-consistency-service/app/eventual_consistency_checker.py
@@ -1,4 +1,3 @@
-import json
 import logging
 import time
 from typing import Dict, List
@@ -367,4 +366,4 @@ def get_status(self):
             )[0]
             LogWriter.info(f"ECC_Status for graphname {self.graphname}: {status}")
             statuses[v_type] = status
-        return statuses
\ No newline at end of file
+        return statuses
diff --git a/eventual-consistency-service/app/graphrag/__init__.py b/eventual-consistency-service/app/graphrag/__init__.py
new file mode 100644
index 00000000..953b2a0b
--- /dev/null
+++ b/eventual-consistency-service/app/graphrag/__init__.py
@@ -0,0 +1 @@
+from .graph_rag import *
diff --git a/eventual-consistency-service/app/graphrag/graph_rag.py b/eventual-consistency-service/app/graphrag/graph_rag.py
new file mode 100644
index 00000000..637546d6
--- /dev/null
+++ b/eventual-consistency-service/app/graphrag/graph_rag.py
@@ -0,0 +1,138 @@
+import asyncio
+import logging
+
+from graphrag.util import install_query
+from graphrag.worker import worker
+from pyTigerGraph import TigerGraphConnection
+
+from common.chunkers import character_chunker, regex_chunker, semantic_chunker
+from common.chunkers.base_chunker import BaseChunker
+from common.config import (doc_processing_config, embedding_service,
+                           get_llm_service, llm_config, milvus_config)
+from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore
+from common.extractors import GraphExtractor, LLMEntityRelationshipExtractor
+from common.extractors.BaseExtractor import BaseExtractor
+
+logger = logging.getLogger(__name__)
+consistency_checkers = {}
+
+
+def get_chunker():
+    if doc_processing_config.get("chunker") == "semantic":
+        chunker = semantic_chunker.SemanticChunker(
+            embedding_service,
+            doc_processing_config["chunker_config"].get("method", "percentile"),
+            doc_processing_config["chunker_config"].get("threshold", 0.95),
+        )
+    elif doc_processing_config.get("chunker") == "regex":
+        chunker = regex_chunker.RegexChunker(
+            pattern=doc_processing_config["chunker_config"].get("pattern", "\\r?\\n")
+        )
+    elif doc_processing_config.get("chunker") == "character":
+        chunker = character_chunker.CharacterChunker(
+            chunk_size=doc_processing_config["chunker_config"].get("chunk_size", 1024),
+            overlap_size=doc_processing_config["chunker_config"].get("overlap_size", 0),
+        )
+    else:
+        raise ValueError("Invalid chunker type")
+
+    return chunker
+
+
+async def install_queries(
+    requried_queries: list[str], conn: TigerGraphConnection, n_workers=8
+):
+    loop = asyncio.get_event_loop()
+    tasks: list[asyncio.Task] = []
+
+    # queries that are currently installed
+    installed_queries = [q.split("/")[-1] for q in conn.getEndpoints(dynamic=True)]
+
+    # add queries to be installed into the queue
+    tq = asyncio.Queue()
+    for q in requried_queries:
+        if q not in installed_queries:
+            tq.put_nowait((install_query, (conn, q)))
+            # break
+
+    print("starting workers")
+    # start workers
+    for n in range(min(tq.qsize(), n_workers)):
+        task = loop.create_task(worker(n, tq))
+        tasks.append(task)
+
+    # wait for workers to finish jobs
+    await tq.join()
+    for t in tasks:
+        print(t.result())
+    return "", "", ""
+
+
+async def init(
+    graphname: str, conn: TigerGraphConnection
+) -> tuple[BaseChunker, dict[str, MilvusEmbeddingStore], BaseExtractor]:
+    # install requried queries
+    requried_queries = [
+        "Scan_For_Updates",
+        "Update_Vertices_Processing_Status",
+        "ECC_Status",
+        "Check_Nonexistent_Vertices",
+    ]
+    await install_queries(requried_queries, conn)
+
+    # init processing tools
+    chunker = get_chunker()
+    vector_indices = {}
+    vertex_field = milvus_config.get("vertex_field", "vertex_id")
+    index_names = milvus_config.get(
+        "indexes",
+        ["Document", "DocumentChunk", "Entity", "Relationship"],
+    )
+    for index_name in index_names:
+        vector_indices[graphname + "_" + index_name] = MilvusEmbeddingStore(
+            embedding_service,
+            host=milvus_config["host"],
+            port=milvus_config["port"],
+            support_ai_instance=True,
+            collection_name=graphname + "_" + index_name,
+            username=milvus_config.get("username", ""),
+            password=milvus_config.get("password", ""),
+            vector_field=milvus_config.get("vector_field", "document_vector"),
+            text_field=milvus_config.get("text_field", "document_content"),
+            vertex_field=vertex_field,
+        )
+
+    if doc_processing_config.get("extractor") == "llm":
+        extractor = GraphExtractor()
+    elif doc_processing_config.get("extractor") == "llm":
+        extractor = LLMEntityRelationshipExtractor(get_llm_service(llm_config))
+    else:
+        raise ValueError("Invalid extractor type")
+
+    if vertex_field is None:
+        raise ValueError(
+            "vertex_field is not defined. Ensure Milvus is enabled in the configuration."
+        )
+
+    return chunker, vector_indices, extractor
+
+
+async def run(graphname: str, conn: TigerGraphConnection):
+    """
+    ecc flow
+
+    initialize_eventual_consistency_checker
+        instantiates ecc object
+        writes checker to checker dict
+        runs ecc_obj.initialize()
+
+    ECC.initialize
+        loops and calls fetch and process
+
+    """
+
+    chunker, vector_indices, extractor = await init(graphname, conn)
+
+    # process docs
+
+    return f"hi from graph rag ecc: {conn.graphname} ({graphname})"
diff --git a/eventual-consistency-service/app/graphrag/util.py b/eventual-consistency-service/app/graphrag/util.py
new file mode 100644
index 00000000..ae6fbcf7
--- /dev/null
+++ b/eventual-consistency-service/app/graphrag/util.py
@@ -0,0 +1,36 @@
+import base64
+from urllib.parse import quote_plus
+
+import httpx
+from pyTigerGraph import TigerGraphConnection
+
+from common.logs.logwriter import LogWriter
+
+
+async def install_query(
+    conn: TigerGraphConnection, query_name: str
+) -> dict[str, httpx.Response | str | None]:
+    print("install --", query_name)
+    LogWriter.info(f"Installing query {query_name}")
+    with open(f"common/gsql/supportai/{query_name}.gsql", "r") as f:
+        query = f.read()
+
+    query = f"""\
+USE GRAPH {conn.graphname}
+{query}
+INSTALL QUERY {query_name}"""
+    tkn = base64.b64encode(f"{conn.username}:{conn.password}".encode()).decode()
+    headers = {"Authorization": f"Basic {tkn}"}
+
+    async with httpx.AsyncClient(timeout=None) as client:
+        res = await client.post(
+            conn.gsUrl + "/gsqlserver/gsql/file",
+            data=quote_plus(query.encode("utf-8")),
+            headers=headers,
+        )
+
+    if "error" in res.text.lower():
+        LogWriter.error(res.text)
+        return {"result": None, "error": f"Failed to install query {query_name}"}
+
+    return {"result": res, "error": False}
diff --git a/eventual-consistency-service/app/graphrag/worker.py b/eventual-consistency-service/app/graphrag/worker.py
new file mode 100644
index 00000000..4edd561a
--- /dev/null
+++ b/eventual-consistency-service/app/graphrag/worker.py
@@ -0,0 +1,27 @@
+import asyncio
+
+
+async def worker(
+    n: int,
+    task_queue: asyncio.Queue,
+):
+    worker_name = f"worker-{n+1}"
+    worker_name += " " if n + 1 < 10 else ""
+    responses = []
+    i = 0
+
+    while not task_queue.empty():
+        # get the next task
+        func, args = await task_queue.get()
+        response = await func(*args)
+
+        responses.append(response)
+        i += 1
+        task_queue.task_done()
+
+    # collate results
+    results = []
+    for r in responses:
+        results.append(r)
+
+    return results
diff --git a/eventual-consistency-service/app/main.py b/eventual-consistency-service/app/main.py
index 4ca26c2c..4c486bc0 100644
--- a/eventual-consistency-service/app/main.py
+++ b/eventual-consistency-service/app/main.py
@@ -1,54 +1,79 @@
+import asyncio
+import json
 import logging
-from typing import Annotated
+from contextlib import asynccontextmanager
+from threading import Thread
+from typing import Annotated, Callable
 
-from fastapi import Depends, FastAPI, BackgroundTasks
+import graphrag
+from eventual_consistency_checker import EventualConsistencyChecker
+from fastapi import BackgroundTasks, Depends, FastAPI, Response, status
 from fastapi.security.http import HTTPBase
 
 from common.config import (
     db_config,
+    doc_processing_config,
     embedding_service,
     get_llm_service,
     llm_config,
     milvus_config,
     security,
-    doc_processing_config,
 )
+from common.db.connections import elevate_db_connection_to_token
 from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore
 from common.logs.logwriter import LogWriter
 from common.metrics.tg_proxy import TigerGraphConnectionProxy
-from common.db.connections import elevate_db_connection_to_token
-from eventual_consistency_checker import EventualConsistencyChecker
-import json
-from threading import Thread
+from common.py_schemas.schemas import SupportAIMethod
 
 logger = logging.getLogger(__name__)
 consistency_checkers = {}
 
-app = FastAPI()
 
-@app.on_event("startup")
-def startup_event():
-    if not db_config.get("enable_consistency_checker", True):
-        LogWriter.info("Eventual consistency checker disabled")
-        return
+@asynccontextmanager
+async def lifespan(_: FastAPI):
+    if not db_config.get("enable_consistency_checker", False):
+        LogWriter.info("Eventual Consistency Checker not run on startup")
+
+    else:
+        startup_checkers = db_config.get("graph_names", [])
+        for graphname in startup_checkers:
+            conn = elevate_db_connection_to_token(
+                db_config["hostname"],
+                db_config["username"],
+                db_config["password"],
+                graphname,
+            )
+            start_ecc_in_thread(graphname, conn)
+    yield
+    LogWriter.info("ECC Shutdown")
+
+
+app = FastAPI(lifespan=lifespan)
 
-    startup_checkers = db_config.get("graph_names", [])
-    for graphname in startup_checkers:
-        conn = elevate_db_connection_to_token(db_config["hostname"], db_config["username"], db_config["password"], graphname)
-        start_ecc_in_thread(graphname, conn)
 
 def start_ecc_in_thread(graphname: str, conn: TigerGraphConnectionProxy):
-    thread = Thread(target=initialize_eventual_consistency_checker, args=(graphname, conn), daemon=True)
+    thread = Thread(
+        target=initialize_eventual_consistency_checker,
+        args=(graphname, conn),
+        daemon=True,
+    )
     thread.start()
     LogWriter.info(f"Eventual consistency checker started for graph {graphname}")
 
-def initialize_eventual_consistency_checker(graphname: str, conn: TigerGraphConnectionProxy):
+
+def initialize_eventual_consistency_checker(
+    graphname: str, conn: TigerGraphConnectionProxy
+):
     if graphname in consistency_checkers:
         return consistency_checkers[graphname]
 
     try:
-        process_interval_seconds = milvus_config.get("process_interval_seconds", 1800) # default 30 minutes
-        cleanup_interval_seconds = milvus_config.get("cleanup_interval_seconds", 86400) # default 30 days,
+        process_interval_seconds = milvus_config.get(
+            "process_interval_seconds", 1800
+        )  # default 30 minutes
+        cleanup_interval_seconds = milvus_config.get(
+            "cleanup_interval_seconds", 86400
+        )  # default 30 days,
         batch_size = milvus_config.get("batch_size", 10)
         vector_indices = {}
         vertex_field = None
@@ -70,7 +95,7 @@ def initialize_eventual_consistency_checker(graphname: str, conn: TigerGraphConn
                     password=milvus_config.get("password", ""),
                     vector_field=milvus_config.get("vector_field", "document_vector"),
                     text_field=milvus_config.get("text_field", "document_content"),
-                    vertex_field=vertex_field
+                    vertex_field=vertex_field,
                 )
 
         if doc_processing_config.get("chunker") == "semantic":
@@ -111,7 +136,9 @@ def initialize_eventual_consistency_checker(graphname: str, conn: TigerGraphConn
             raise ValueError("Invalid extractor type")
 
         if vertex_field is None:
-            raise ValueError("vertex_field is not defined. Ensure Milvus is enabled in the configuration.")
+            raise ValueError(
+                "vertex_field is not defined. Ensure Milvus is enabled in the configuration."
+            )
 
         checker = EventualConsistencyChecker(
             process_interval_seconds,
@@ -124,7 +151,7 @@ def initialize_eventual_consistency_checker(graphname: str, conn: TigerGraphConn
             conn,
             chunker,
             extractor,
-            batch_size
+            batch_size,
         )
         consistency_checkers[graphname] = checker
 
@@ -138,22 +165,65 @@ def initialize_eventual_consistency_checker(graphname: str, conn: TigerGraphConn
 
         return checker
     except Exception as e:
-        LogWriter.error(f"Failed to start eventual consistency checker for graph {graphname}: {e}")
+        LogWriter.error(
+            f"Failed to start eventual consistency checker for graph {graphname}: {e}"
+        )
+
+
+def start_func_in_thread(f: Callable, *args, **kwargs):
+    thread = Thread(
+        target=f,
+        args=args,
+        kwargs=kwargs,
+        daemon=True,
+    )
+    thread.start()
+    LogWriter.info(f'Thread started for function: "{f.__name__}"')
+
+
+# def start_async_func(f: Callable, *args, **kwargs):
+#     asyncio.run(f(args, kwargs))
+#     LogWriter.info(f'Thread started for function: "{f.__name__}"')
+
 
 @app.get("/")
 def root():
     LogWriter.info(f"Healthcheck")
     return {"status": "ok"}
 
-@app.get("/{graphname}/consistency_status")
-def consistency_status(graphname: str, credentials: Annotated[HTTPBase, Depends(security)]):
-    if graphname in consistency_checkers:
-        ecc = consistency_checkers[graphname]
-        status = json.dumps(ecc.get_status())
-    else:
-        conn = elevate_db_connection_to_token(db_config["hostname"], credentials.username, credentials.password, graphname)
-        start_ecc_in_thread(graphname, conn)
-        status = f"Eventual consistency checker started for graph {graphname}"
 
-    LogWriter.info(f"Returning consistency status for {graphname}: {status}")
-    return status
+@app.get("/{graphname}/consistency_status/{ecc_method}")
+def consistency_status(
+    graphname: str,
+    ecc_method: str,
+    background: BackgroundTasks,
+    credentials: Annotated[HTTPBase, Depends(security)],
+    response: Response,
+):
+    conn = elevate_db_connection_to_token(
+        db_config["hostname"],
+        credentials.username,
+        credentials.password,
+        graphname,
+    )
+    match ecc_method:
+        case SupportAIMethod.SUPPORTAI:
+            if graphname in consistency_checkers:
+                ecc = consistency_checkers[graphname]
+                ecc_status = json.dumps(ecc.get_status())
+            else:
+                start_ecc_in_thread(graphname, conn)
+                ecc_status = (
+                    f"Eventual consistency checker started for graph {graphname}"
+                )
+
+            LogWriter.info(f"Returning consistency status for {graphname}: {status}")
+        case SupportAIMethod.GRAPHRAG:
+            background.add_task(graphrag.run, graphname, conn)
+            # asyncio.run(graphrag.run(graphname, conn))
+            ecc_status = f"hi from graph rag ecc: {conn.graphname} ({graphname})"
+        case _:
+            response.status_code = status.HTTP_404_NOT_FOUND
+            return f"Method unsupported, must be {SupportAIMethod.SUPPORTAI}, {SupportAIMethod.GRAPHRAG}"
+
+    return ecc_status

From 8e0ed554c8041c5cc9b070f51636a8c636275b2f Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 23 Jul 2024 17:30:08 -0400
Subject: [PATCH 02/53] save: docs handled concurrently -- writing upsert_edge

---
 docker-compose.yml                            | 154 +++++++++---------
 .../app/graphrag/graph_rag.py                 |  95 +++++++----
 .../app/graphrag/util.py                      | 151 ++++++++++++++++-
 .../app/graphrag/worker.py                    |  33 +++-
 eventual-consistency-service/app/main.py      |  37 +----
 5 files changed, 309 insertions(+), 161 deletions(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 4da38a25..f0a80154 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,30 +1,30 @@
 services:
-  copilot:
-    image: tigergraphml/copilot:latest 
-    container_name: copilot
-    build:
-      context: .
-      dockerfile: copilot/Dockerfile
-    ports:
-      - 8000:8000
-    depends_on:
-      - eventual-consistency-service
-      - chat-history
-    environment:
-      LLM_CONFIG: "/code/configs/llm_config.json"
-      DB_CONFIG: "/code/configs/db_config.json"
-      MILVUS_CONFIG: "/code/configs/milvus_config.json"
-      LOGLEVEL: "INFO"
-      USE_CYPHER: "true"
-    volumes:
-      - ./configs/:/code/configs
-      - ./common:/code/common
-    networks:
-      - copilot_local
-
+#   copilot:
+#     image: tigergraphml/copilot:latest 
+#     container_name: copilot
+#     build:
+#       context: .
+#       dockerfile: copilot/Dockerfile
+#     ports:
+#       - 8000:8000
+#     depends_on:
+#       - eventual-consistency-service
+#       - chat-history
+#     environment:
+#       LLM_CONFIG: "/code/configs/llm_config.json"
+#       DB_CONFIG: "/code/configs/db_config.json"
+#       MILVUS_CONFIG: "/code/configs/milvus_config.json"
+#       LOGLEVEL: "INFO"
+#       USE_CYPHER: "true"
+#     volumes:
+#       - ./configs/:/code/configs
+#       - ./common:/code/common
+#     networks:
+#       - copilot_local
+#
   eventual-consistency-service:
     image: tigergraphml/ecc:latest
-    container_name: eventual-consistency-service
+    # container_name: eventual-consistency-service
     build:
       context: .
       dockerfile: eventual-consistency-service/Dockerfile
@@ -40,64 +40,64 @@ services:
       - ./common:/code/common
     networks:
       - copilot_local
-
-  chat-history:
-    image: tigergraphml/chat-history:latest
-    container_name: chat-history
-    build:
-      context: chat-history/
-      dockerfile: Dockerfile
-    ports:
-      - 8002:8002
-    environment:
-      CONFIG: "/configs/config.json"
-      LOGLEVEL: "INFO"
-    volumes:
-      - ./chat-history/:/configs
-    networks:
-      - copilot_local
-  # report-service:
-  #   image: tigergraphml/report-service:latest
-  #   container_name: report-service
+#
+  # chat-history:
+  #   image: tigergraphml/chat-history:latest
+  #   container_name: chat-history
   #   build:
-  #     context: .
-  #     dockerfile: report-service/Dockerfile
+  #     context: chat-history/
+  #     dockerfile: Dockerfile
   #   ports:
   #     - 8002:8002
   #   environment:
-  #     LLM_CONFIG: "/code/configs/llm_config.json"
-  #     DB_CONFIG: "/code/configs/db_config.json"
-  #     MILVUS_CONFIG: "/code/configs/milvus_config.json"
+  #     CONFIG: "/configs/config.json"
   #     LOGLEVEL: "INFO"
   #   volumes:
-  #     - ./configs/:/code/configs
-  #     - ./common:/code/common
-      #
-  ui:
-    image: tigergraphml/copilot-ui:latest 
-    container_name: ui 
-    build:
-      context: copilot-ui
-      dockerfile: Dockerfile
-    ports:
-      - 3000:3000
-    depends_on:
-      - copilot
-    networks:
-      - copilot_local
-
-  nginx:
-    container_name: nginx
-    image: nginx
-    volumes:
-      - ./nginx/nginx.conf:/etc/nginx/conf.d/default.conf
-    ports:
-      - "80:80"
-    depends_on:
-      - ui
-      - copilot
-    networks:
-      - copilot_local
-
+  #     - ./configs/:/configs
+  #   networks:
+  #     - copilot_local
+#   # report-service:
+#   #   image: tigergraphml/report-service:latest
+#   #   container_name: report-service
+#   #   build:
+#   #     context: .
+#   #     dockerfile: report-service/Dockerfile
+#   #   ports:
+#   #     - 8002:8002
+#   #   environment:
+#   #     LLM_CONFIG: "/code/configs/llm_config.json"
+#   #     DB_CONFIG: "/code/configs/db_config.json"
+#   #     MILVUS_CONFIG: "/code/configs/milvus_config.json"
+#   #     LOGLEVEL: "INFO"
+#   #   volumes:
+#   #     - ./configs/:/code/configs
+#   #     - ./common:/code/common
+#       #
+#   ui:
+#     image: tigergraphml/copilot-ui:latest 
+#     container_name: ui 
+#     build:
+#       context: copilot-ui
+#       dockerfile: Dockerfile
+#     ports:
+#       - 3000:3000
+#     depends_on:
+#       - copilot
+#     networks:
+#       - copilot_local
+#
+#   nginx:
+#     container_name: nginx
+#     image: nginx
+#     volumes:
+#       - ./nginx/nginx.conf:/etc/nginx/conf.d/default.conf
+#     ports:
+#       - "80:80"
+#     depends_on:
+#       - ui
+#       - copilot
+#     networks:
+#       - copilot_local
+#
 networks:
   copilot_local:
diff --git a/eventual-consistency-service/app/graphrag/graph_rag.py b/eventual-consistency-service/app/graphrag/graph_rag.py
index 637546d6..1477d9e0 100644
--- a/eventual-consistency-service/app/graphrag/graph_rag.py
+++ b/eventual-consistency-service/app/graphrag/graph_rag.py
@@ -1,14 +1,19 @@
 import asyncio
 import logging
 
-from graphrag.util import install_query
+import ecc_util
+from graphrag.util import install_query, stream_docs, upsert_chunk
 from graphrag.worker import worker
 from pyTigerGraph import TigerGraphConnection
 
-from common.chunkers import character_chunker, regex_chunker, semantic_chunker
 from common.chunkers.base_chunker import BaseChunker
-from common.config import (doc_processing_config, embedding_service,
-                           get_llm_service, llm_config, milvus_config)
+from common.config import (
+    doc_processing_config,
+    embedding_service,
+    get_llm_service,
+    llm_config,
+    milvus_config,
+)
 from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore
 from common.extractors import GraphExtractor, LLMEntityRelationshipExtractor
 from common.extractors.BaseExtractor import BaseExtractor
@@ -17,28 +22,6 @@
 consistency_checkers = {}
 
 
-def get_chunker():
-    if doc_processing_config.get("chunker") == "semantic":
-        chunker = semantic_chunker.SemanticChunker(
-            embedding_service,
-            doc_processing_config["chunker_config"].get("method", "percentile"),
-            doc_processing_config["chunker_config"].get("threshold", 0.95),
-        )
-    elif doc_processing_config.get("chunker") == "regex":
-        chunker = regex_chunker.RegexChunker(
-            pattern=doc_processing_config["chunker_config"].get("pattern", "\\r?\\n")
-        )
-    elif doc_processing_config.get("chunker") == "character":
-        chunker = character_chunker.CharacterChunker(
-            chunk_size=doc_processing_config["chunker_config"].get("chunk_size", 1024),
-            overlap_size=doc_processing_config["chunker_config"].get("overlap_size", 0),
-        )
-    else:
-        raise ValueError("Invalid chunker type")
-
-    return chunker
-
-
 async def install_queries(
     requried_queries: list[str], conn: TigerGraphConnection, n_workers=8
 ):
@@ -51,11 +34,10 @@ async def install_queries(
     # add queries to be installed into the queue
     tq = asyncio.Queue()
     for q in requried_queries:
-        if q not in installed_queries:
+        q_name = q.split("/")[-1]
+        if q_name not in installed_queries:
             tq.put_nowait((install_query, (conn, q)))
-            # break
 
-    print("starting workers")
     # start workers
     for n in range(min(tq.qsize(), n_workers)):
         task = loop.create_task(worker(n, tq))
@@ -65,23 +47,48 @@ async def install_queries(
     await tq.join()
     for t in tasks:
         print(t.result())
+        # TODO: Check if anything had an error
     return "", "", ""
 
 
+async def process_doc(
+    conn: TigerGraphConnection, doc: dict[str, str], sem: asyncio.Semaphore
+):
+    # TODO: Embed document and chunks
+    chunker = ecc_util.get_chunker()
+    try:
+        print(">>>>>", doc["v_id"], len(doc["attributes"]["text"]))
+        # await asyncio.sleep(5)
+        chunks = chunker.chunk(doc["attributes"]["text"])
+        v_id = doc["v_id"]
+        # TODO: n chunks at a time
+        for i, chunk in enumerate(chunks):
+            await upsert_chunk(conn, v_id, f"{v_id}_chunk_{i}", chunk)
+            # break  # single chunk FIXME: delete
+    finally:
+        sem.release()
+
+    return doc["v_id"]
+
+
 async def init(
     graphname: str, conn: TigerGraphConnection
 ) -> tuple[BaseChunker, dict[str, MilvusEmbeddingStore], BaseExtractor]:
     # install requried queries
     requried_queries = [
-        "Scan_For_Updates",
-        "Update_Vertices_Processing_Status",
-        "ECC_Status",
-        "Check_Nonexistent_Vertices",
+        # "common/gsql/supportai/Scan_For_Updates",
+        # "common/gsql/supportai/Update_Vertices_Processing_Status",
+        # "common/gsql/supportai/ECC_Status",
+        # "common/gsql/supportai/Check_Nonexistent_Vertices",
+        "common/gsql/graphRAG/StreamDocIds",
+        "common/gsql/graphRAG/StreamDocContent",
     ]
-    await install_queries(requried_queries, conn)
+    # await install_queries(requried_queries, conn)
+    return await install_queries(requried_queries, conn)
 
     # init processing tools
-    chunker = get_chunker()
+    chunker = ecc_util.get_chunker()
+
     vector_indices = {}
     vertex_field = milvus_config.get("vertex_field", "vertex_id")
     index_names = milvus_config.get(
@@ -131,8 +138,26 @@ async def run(graphname: str, conn: TigerGraphConnection):
 
     """
 
+    # init configurable objects
     chunker, vector_indices, extractor = await init(graphname, conn)
 
     # process docs
+    doc_workers = 48  # TODO: make configurable
+    doc_tasks = []
+    doc_sem = asyncio.Semaphore(doc_workers)
+
+    async with asyncio.TaskGroup() as tg:
+        async for content in stream_docs(conn):
+            # only n workers at a time -- held up by semaphore
+            print(">>>>>>>>>>>>>>>>>>>>>>>>\n", len(doc_tasks), "<<<<<<<<<")
+            await doc_sem.acquire()
+            task = tg.create_task(process_doc(conn, content, doc_sem))
+            doc_tasks.append(task)
+            break
+
+    # do something with doc_tasks
+    for t in doc_tasks:
+        print(t.result())
 
+    print("DONE")
     return f"hi from graph rag ecc: {conn.graphname} ({graphname})"
diff --git a/eventual-consistency-service/app/graphrag/util.py b/eventual-consistency-service/app/graphrag/util.py
index ae6fbcf7..ce2efe52 100644
--- a/eventual-consistency-service/app/graphrag/util.py
+++ b/eventual-consistency-service/app/graphrag/util.py
@@ -1,4 +1,7 @@
 import base64
+import json
+import time
+import traceback
 from urllib.parse import quote_plus
 
 import httpx
@@ -7,14 +10,24 @@
 from common.logs.logwriter import LogWriter
 
 
+def make_headers(conn: TigerGraphConnection):
+    if conn.apiToken is None or conn.apiToken == "":
+        tkn = base64.b64encode(f"{conn.username}:{conn.password}".encode()).decode()
+        headers = {"Authorization": f"Basic {tkn}"}
+    else:
+        headers = {"Authorization": f"Bearer {conn.apiToken}"}
+
+    return headers
+
+
 async def install_query(
-    conn: TigerGraphConnection, query_name: str
+    conn: TigerGraphConnection, query_path: str
 ) -> dict[str, httpx.Response | str | None]:
-    print("install --", query_name)
-    LogWriter.info(f"Installing query {query_name}")
-    with open(f"common/gsql/supportai/{query_name}.gsql", "r") as f:
+    LogWriter.info(f"Installing query {query_path}")
+    with open(f"{query_path}.gsql", "r") as f:
         query = f.read()
 
+    query_name = query_path.split("/")[-1]
     query = f"""\
 USE GRAPH {conn.graphname}
 {query}
@@ -31,6 +44,134 @@ async def install_query(
 
     if "error" in res.text.lower():
         LogWriter.error(res.text)
-        return {"result": None, "error": f"Failed to install query {query_name}"}
+        return {
+            "result": None,
+            "error": True,
+            "message": f"Failed to install query {query_name}",
+        }
 
     return {"result": res, "error": False}
+
+
+async def stream_doc_ids(
+    conn: TigerGraphConnection, current_batch: int, ttl_batches: int
+) -> dict[str, str | list[str]]:
+    headers = make_headers(conn)
+
+    try:
+        async with httpx.AsyncClient(timeout=None) as client:
+            res = await client.post(
+                f"{conn.restppUrl}/query/{conn.graphname}/StreamDocIds",
+                params={
+                    "current_batch": current_batch,
+                    "ttl_batches": ttl_batches,
+                },
+                headers=headers,
+            )
+        ids = res.json()["results"][0]["@@doc_ids"]
+        return {"error": False, "ids": ids}
+
+    except Exception as e:
+        exc = traceback.format_exc()
+        LogWriter.error(
+            f"/{conn.graphname}/query/StreamDocIds\nException Trace:\n{exc}"
+        )
+
+        return {"error": True, "message": str(e)}
+
+
+async def stream_docs(conn: TigerGraphConnection, ttl_batches: int = 10):
+    headers = make_headers(conn)
+    for i in range(ttl_batches):
+        doc_ids = await stream_doc_ids(conn, i, ttl_batches)
+        if doc_ids["error"]:
+            print(doc_ids)
+            break  # TODO: handle error
+
+        print("*******")
+        print(doc_ids)
+        print("*******")
+        for d in doc_ids["ids"]:
+            async with httpx.AsyncClient(timeout=None) as client:
+                res = await client.get(
+                    f"{conn.restppUrl}/query/{conn.graphname}/StreamDocContent/",
+                    params={"doc": d},
+                    headers=headers,
+                )
+
+                # TODO: check for errors
+                yield res.json()["results"][0]["DocContent"][0]
+            return  # single doc test FIXME: delete
+        # return # single batch test FIXME: delete
+
+
+def map_attrs(attributes: dict):
+    # map attrs
+    attrs = {}
+    for k, v in attributes.items():
+        if isinstance(v, tuple):
+            attrs[k] = {"value": v[0], "op": v[1]}
+        elif isinstance(v, dict):
+            attrs[k] = {
+                "value": {"keylist": list(v.keys()), "valuelist": list(v.values())}
+            }
+        else:
+            attrs[k] = {"value": v}
+    return attrs
+
+
+async def upsert_vertex(
+    conn: TigerGraphConnection,
+    vertex_type: str,
+    vertex_id: str,
+    attributes: dict = None,
+):
+    attrs = map_attrs(attributes)
+    data = json.dumps({"vertices": {vertex_type: {vertex_id: attrs}}})
+    headers = make_headers(conn)
+    async with httpx.AsyncClient(timeout=None) as client:
+        res = await client.post(
+            f"{conn.restppUrl}/graph/{conn.graphname}", data=data, headers=headers
+        )
+        print(res)
+
+async def upsert_edge(
+    conn: TigerGraphConnection,
+    vertex_type: str,
+    vertex_id: str,
+    attributes: dict = None,
+):
+   TODO 
+    attrs = map_attrs(attributes)
+    data = json.dumps({"vertices": {vertex_type: {vertex_id: attrs}}})
+    headers = make_headers(conn)
+    async with httpx.AsyncClient(timeout=None) as client:
+        res = await client.post(
+            f"{conn.restppUrl}/graph/{conn.graphname}", data=data, headers=headers
+        )
+        print(res)
+
+async def upsert_chunk(conn: TigerGraphConnection, doc_id, chunk_id, chunk):
+    date_added = int(time.time())
+    await upsert_vertex(
+        conn,
+        "DocumentChunk",
+        chunk_id,
+        attributes={"epoch_added": date_added, "idx": int(chunk_id.split("_")[-1])},
+    )
+    await upsert_vertex(
+        conn,
+        "Content",
+        chunk_id,
+        attributes={"text": chunk, "epoch_added": date_added},
+    )
+    conn.upsertEdge("DocumentChunk", chunk_id, "HAS_CONTENT", "Content", chunk_id)
+    # self.conn.upsertEdge("Document", doc_id, "HAS_CHILD", "DocumentChunk", chunk_id)
+    # if int(chunk_id.split("_")[-1]) > 0:
+    #     self.conn.upsertEdge(
+    #         "DocumentChunk",
+    #         chunk_id,
+    #         "IS_AFTER",
+    #         "DocumentChunk",
+    #         doc_id + "_chunk_" + str(int(chunk_id.split("_")[-1]) - 1),
+    #     )
diff --git a/eventual-consistency-service/app/graphrag/worker.py b/eventual-consistency-service/app/graphrag/worker.py
index 4edd561a..a2c7bbb6 100644
--- a/eventual-consistency-service/app/graphrag/worker.py
+++ b/eventual-consistency-service/app/graphrag/worker.py
@@ -1,27 +1,42 @@
 import asyncio
 
 
+# class Channel(asyncio.Queue):
+#     def __init__(self, maxsize=0):
+#         self.is_open = True
+#         super().__init__(maxsize)
+#
+#     def close(self):
+#         self.is_open = False
+
+
 async def worker(
     n: int,
     task_queue: asyncio.Queue,
 ):
+    # init worker logging/reporting (TODO)
     worker_name = f"worker-{n+1}"
     worker_name += " " if n + 1 < 10 else ""
-    responses = []
-    i = 0
 
+    while task_queue.empty():
+        print(f"{worker_name} waiting")
+        await asyncio.sleep(1)
+
+    # consume task queue
+    print(f"{worker_name} started")
+    responses = []
     while not task_queue.empty():
         # get the next task
         func, args = await task_queue.get()
+
+        # execute the task
         response = await func(*args)
 
+        # append task results to worker results/response
         responses.append(response)
-        i += 1
-        task_queue.task_done()
 
-    # collate results
-    results = []
-    for r in responses:
-        results.append(r)
+        # mark task as done
+        task_queue.task_done()
 
-    return results
+    print(f"{worker_name} done")
+    return responses
diff --git a/eventual-consistency-service/app/main.py b/eventual-consistency-service/app/main.py
index 4c486bc0..0277a272 100644
--- a/eventual-consistency-service/app/main.py
+++ b/eventual-consistency-service/app/main.py
@@ -1,10 +1,10 @@
-import asyncio
 import json
 import logging
 from contextlib import asynccontextmanager
 from threading import Thread
 from typing import Annotated, Callable
 
+import ecc_util
 import graphrag
 from eventual_consistency_checker import EventualConsistencyChecker
 from fastapi import BackgroundTasks, Depends, FastAPI, Response, status
@@ -98,35 +98,7 @@ def initialize_eventual_consistency_checker(
                     vertex_field=vertex_field,
                 )
 
-        if doc_processing_config.get("chunker") == "semantic":
-            from common.chunkers.semantic_chunker import SemanticChunker
-
-            chunker = SemanticChunker(
-                embedding_service,
-                doc_processing_config["chunker_config"].get("method", "percentile"),
-                doc_processing_config["chunker_config"].get("threshold", 0.95),
-            )
-        elif doc_processing_config.get("chunker") == "regex":
-            from common.chunkers.regex_chunker import RegexChunker
-
-            chunker = RegexChunker(
-                pattern=doc_processing_config["chunker_config"].get(
-                    "pattern", "\\r?\\n"
-                )
-            )
-        elif doc_processing_config.get("chunker") == "character":
-            from common.chunkers.character_chunker import CharacterChunker
-
-            chunker = CharacterChunker(
-                chunk_size=doc_processing_config["chunker_config"].get(
-                    "chunk_size", 1024
-                ),
-                overlap_size=doc_processing_config["chunker_config"].get(
-                    "overlap_size", 0
-                ),
-            )
-        else:
-            raise ValueError("Invalid chunker type")
+        chunker = ecc_util.get_chunker()
 
         if doc_processing_config.get("extractor") == "llm":
             from common.extractors import LLMEntityRelationshipExtractor
@@ -181,11 +153,6 @@ def start_func_in_thread(f: Callable, *args, **kwargs):
     LogWriter.info(f'Thread started for function: "{f.__name__}"')
 
 
-# def start_async_func(f: Callable, *args, **kwargs):
-#     asyncio.run(f(args, kwargs))
-#     LogWriter.info(f'Thread started for function: "{f.__name__}"')
-
-
 @app.get("/")
 def root():
     LogWriter.info(f"Healthcheck")

From ec299a27f84121f9e8e85666847dc517f80e2291 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 23 Jul 2024 17:39:04 -0400
Subject: [PATCH 03/53] save: docs handled concurrently -- writing upsert_edge

---
 common/gsql/graphRAG/StreamDocContent.gsql    |   5 +
 common/gsql/graphRAG/StreamDocIds.gsql        |  10 +
 .../louvain/louvain_1_first_pass.gsql         | 176 ++++++++++++++++++
 eventual-consistency-service/app/ecc_util.py  |  24 +++
 4 files changed, 215 insertions(+)
 create mode 100644 common/gsql/graphRAG/StreamDocContent.gsql
 create mode 100644 common/gsql/graphRAG/StreamDocIds.gsql
 create mode 100644 common/gsql/graphRAG/louvain/louvain_1_first_pass.gsql
 create mode 100644 eventual-consistency-service/app/ecc_util.py

diff --git a/common/gsql/graphRAG/StreamDocContent.gsql b/common/gsql/graphRAG/StreamDocContent.gsql
new file mode 100644
index 00000000..fb7338b7
--- /dev/null
+++ b/common/gsql/graphRAG/StreamDocContent.gsql
@@ -0,0 +1,5 @@
+CREATE QUERY StreamDocContent(Vertex<Document> doc) {
+    Doc = {doc};
+    DocContent = SELECT c FROM Doc:d -(HAS_CONTENT)-> Content:c;
+    PRINT DocContent;
+}
diff --git a/common/gsql/graphRAG/StreamDocIds.gsql b/common/gsql/graphRAG/StreamDocIds.gsql
new file mode 100644
index 00000000..fb373490
--- /dev/null
+++ b/common/gsql/graphRAG/StreamDocIds.gsql
@@ -0,0 +1,10 @@
+CREATE QUERY StreamDocIds(INT current_batch, INT ttl_batches) {
+    ListAccum<STRING> @@doc_ids;
+    Docs = {Document.*};
+
+    Docs = SELECT d FROM Docs:d
+           WHERE vertex_to_int(d) % ttl_batches == current_batch
+           ACCUM @@doc_ids += d.id;
+    
+    PRINT @@doc_ids;
+}
diff --git a/common/gsql/graphRAG/louvain/louvain_1_first_pass.gsql b/common/gsql/graphRAG/louvain/louvain_1_first_pass.gsql
new file mode 100644
index 00000000..4ca06029
--- /dev/null
+++ b/common/gsql/graphRAG/louvain/louvain_1_first_pass.gsql
@@ -0,0 +1,176 @@
+CREATE OR REPLACE DISTRIBUTED QUERY graphRAG_louvain_1(
+  UINT max_hop = 10,
+  UINT batch_num = 12,
+  UINT sample_edge_num = 100
+) FOR GRAPH {graph_name} SYNTAX v1 {
+
+  TYPEDEF TUPLE <DOUBLE delta_Q_add, VERTEX<{entity_vertex_name}> community, STRING ext_vid> MyTuple; --> this should be Community, I think
+  SumAccum<DOUBLE> @@m; // the sum of the weights of all the links in the network
+  MinAccum<VERTEX<{entity_vertex_name}>> @{community_id_attribute_name}; // the community ID of the node
+  MinAccum<STRING> @community_vid; // the community ID of the node
+  SumAccum<DOUBLE> @k; // the sum of the weights of the links incident to the node
+  SumAccum<DOUBLE> @k_in; // the sum of the weights of the links inside the previous community of the node
+  SumAccum<DOUBLE> @k_self_loop; // the weight of the self-loop link
+  MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>> @community_k_in_map; // the community of the neighbors of the nodes -> the sum of the weights of the links inside the community
+  MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>> @@community_sum_total_map; // community ID C -> the sum of the weights of the links incident to nodes in C
+  SumAccum<DOUBLE> @community_sum_total; // the sum of the weights of the links incident to nodes in the community of the node
+  MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>> @@community_sum_in_map; // community ID -> the sum of the weights of the links inside the community
+  MapAccum<VERTEX<{entity_vertex_name}>, MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>>> @@source_target_k_in_map; // source community ID -> (target community ID -> the sum of the weights of the links from the source community to the target community)
+  SumAccum<DOUBLE> @delta_Q_remove; // delta Q to remove the node from the previous community
+  MaxAccum<MyTuple> @best_move; // best move of the node with the highest delta Q to move the isolated node into the new community
+  MaxAccum<DOUBLE> @@min_double; // used to reset the @best_move
+  SumAccum<INT> @@move_cnt;
+  OrAccum @to_change_community;
+  SumAccum<INT> @batch_id;
+  SumAccum<INT> @vid;
+
+  DOUBLE wt = 1.0;
+
+  // Initialization
+  All_Nodes = {{{entity_vertex_name}.*}};
+  All_Nodes = SELECT s FROM All_Nodes:s -({relation_edge_name}:e)- :t
+              ACCUM @@m += wt / 2,
+                    s.@k += wt,
+                    IF s == t THEN // self-loop link
+                      js.@k_self_loop += wt
+                    END
+              POST-ACCUM
+                s.@{community_id_attribute_name} = s,
+                s.@community_vid = to_string(s.id),
+                s.@vid = getvid(s),
+                s.@batch_id = s.@vid % batch_num;
+
+  IF @@m < 0.00000000001 THEN
+    PRINT "Warning: the sum of the weights in the edges should be greater than zero!";
+    RETURN;
+  END;
+
+  // Local moving
+  INT hop = 0;
+  Candidates = All_Nodes;
+  WHILE Candidates.size() > 0 AND hop < max_hop DO
+    hop = hop + 1;
+    LOG(TRUE, hop);
+    IF hop == 1 THEN // first iteration
+      ChangedNodes = SELECT s FROM Candidates:s -({relation_edge_name}:e)- :t
+                      WHERE s.@{community_id_attribute_name} != t.@{community_id_attribute_name}
+                      ACCUM s.@best_move += MyTuple(1 - s.@k * t.@k / (2 * @@m), t.@{community_id_attribute_name}, t.@community_vid)
+                      POST-ACCUM
+                        IF s.@best_move.delta_Q_add > 0 THEN // the gain (delta Q) is positive
+                          s.@to_change_community = TRUE
+                        END
+                      HAVING s.@to_change_community == TRUE;
+
+    ELSE // remaining iterations
+      // Calculate sum_total
+      Tmp = SELECT s FROM All_Nodes:s
+            POST-ACCUM
+              @@community_sum_total_map += (s.@{community_id_attribute_name} -> s.@k);
+      Tmp = SELECT s FROM All_Nodes:s
+            POST-ACCUM
+              s.@community_sum_total = @@community_sum_total_map.get(s.@{community_id_attribute_name});
+
+      @@community_sum_total_map.clear();
+      // Find the best move
+      ChangedNodes = {{}};
+      FOREACH batch_id IN RANGE[0, batch_num-1] DO
+        LOG(TRUE, batch_id);
+        // Calculate the delta Q to remove the node from the previous community
+        Nodes = SELECT s FROM Candidates:s -({relation_edge_name}:e)- :t
+                WHERE s.@batch_id == batch_id
+                ACCUM 
+                  IF s.@{community_id_attribute_name} == t.@{community_id_attribute_name} THEN
+                         s.@k_in += wt
+                  ELSE
+                    s.@community_k_in_map += (t.@{community_id_attribute_name} -> wt)
+                  END
+                POST-ACCUM
+                  s.@delta_Q_remove = 2 * s.@k_self_loop - 2 * s.@k_in + s.@k * (s.@community_sum_total - s.@k) / @@m,
+                  s.@k_in = 0,
+                  s.@best_move = MyTuple(@@min_double, s, to_string(s.id)) // reset the delta_Q_add
+        ;
+
+        // Find the best move
+        Nodes = SELECT s FROM Nodes:s -({relation_edge_name}:e)- :t
+                  //SAMPLE sample_edge_num EDGE WHEN s.outdegree("{relation_edge_name}") > sample_edge_num
+                  WHERE s.@{community_id_attribute_name} != t.@{community_id_attribute_name}
+                  ACCUM DOUBLE delta_Q_add = 2 * s.@community_k_in_map.get(t.@{community_id_attribute_name}) - s.@k * t.@community_sum_total / @@m,
+                    s.@best_move += MyTuple(delta_Q_add, t.@{community_id_attribute_name}, t.@community_vid)
+                  POST-ACCUM
+                    IF s.@delta_Q_remove + s.@best_move.delta_Q_add > 0 THEN // the gain (delta Q) is positive
+                      s.@to_change_community = TRUE
+                    END,
+                    s.@community_k_in_map.clear()
+                  HAVING s.@to_change_community == TRUE;
+
+        ChangedNodes = ChangedNodes UNION Nodes;
+      END;
+    END;
+    // If two nodes swap, only change the community of one of them
+    SwapNodes = SELECT s FROM ChangedNodes:s -({relation_edge_name}:e)- :t
+                  WHERE s.@best_move.community == t.@{community_id_attribute_name}
+                        AND t.@to_change_community == TRUE
+                        AND t.@best_move.community == s.@{community_id_attribute_name}
+                        // only change the one with larger delta Q or the one with smaller @vid if delta Q are the same
+                        AND (
+                          s.@delta_Q_remove + s.@best_move.delta_Q_add < t.@delta_Q_remove + t.@best_move.delta_Q_add
+                          OR (
+                            abs((s.@delta_Q_remove + s.@best_move.delta_Q_add) - (t.@delta_Q_remove + t.@best_move.delta_Q_add)) < 0.00000000001
+                            AND s.@vid > t.@vid
+                          )
+                        )
+                  POST-ACCUM
+                    s.@to_change_community = FALSE;
+
+    ChangedNodes = ChangedNodes MINUS SwapNodes;
+
+    // Place each node of ChangedNodes in the community in which the gain is maximum
+    ChangedNodes = SELECT s FROM ChangedNodes:s
+                   POST-ACCUM
+                     s.@{community_id_attribute_name} = s.@best_move.community,
+                     s.@community_vid = s.@best_move.ext_vid,
+                     s.@to_change_community = FALSE;
+      
+    @@move_cnt += ChangedNodes.size();
+
+    // Get all neighbours of the changed node that do not belong to the node’s new community
+    Candidates = SELECT t FROM ChangedNodes:s -({relation_edge_name}:e)- :t
+                 WHERE t.@{community_id_attribute_name} != s.@{community_id_attribute_name};
+  END;
+
+  PRINT @@move_cnt AS Delta;
+  
+  // Coarsening
+  UINT new_layer = 0;
+  @@community_sum_total_map.clear();
+  Tmp =
+    SELECT s FROM All_Nodes:s -({relation_edge_name}:e)- :t
+    ACCUM
+      IF s.@{community_id_attribute_name} == t.@{community_id_attribute_name} THEN
+        @@community_sum_in_map += (s.@{community_id_attribute_name} -> wt)
+      END
+    POST-ACCUM
+      //f_belongs_to.println(s.id, s.@{community_id_attribute_name}, new_layer),
+      INSERT INTO {belongs_to_edge_name} VALUES (s, str_to_int(s.@community_vid), new_layer),
+      IF @@community_sum_in_map.containsKey(s) THEN
+        //f_links_to.println(s.id, s.id, @@community_sum_in_map.get(s), new_layer)
+        INSERT INTO {links_to_edge_name} VALUES (s,s, (new_layer -> @@community_sum_in_map.get(s)))
+      END;
+  
+  @@community_sum_in_map.clear();
+
+  Tmp = SELECT s FROM All_Nodes:s -({relation_edge_name}:e)- :t
+      ACCUM
+        IF s.@{community_id_attribute_name} != t.@{community_id_attribute_name} THEN
+          @@source_target_k_in_map += (s.@{community_id_attribute_name} -> (t.@{community_id_attribute_name} -> wt))
+        END
+      POST-ACCUM
+        IF @@source_target_k_in_map.containsKey(s) THEN
+          FOREACH (target_community, k_in) IN @@source_target_k_in_map.get(s) DO
+            //f_links_to.println(s.id, target_community, k_in, new_layer)
+            INSERT INTO {links_to_edge_name} VALUES (s,target_community, (new_layer -> k_in))
+          END
+        END;
+
+  @@source_target_k_in_map.clear();
+}
diff --git a/eventual-consistency-service/app/ecc_util.py b/eventual-consistency-service/app/ecc_util.py
new file mode 100644
index 00000000..5656e219
--- /dev/null
+++ b/eventual-consistency-service/app/ecc_util.py
@@ -0,0 +1,24 @@
+from common.chunkers import character_chunker, regex_chunker, semantic_chunker
+from common.config import doc_processing_config, embedding_service
+
+
+def get_chunker():
+    if doc_processing_config.get("chunker") == "semantic":
+        chunker = semantic_chunker.SemanticChunker(
+            embedding_service,
+            doc_processing_config["chunker_config"].get("method", "percentile"),
+            doc_processing_config["chunker_config"].get("threshold", 0.95),
+        )
+    elif doc_processing_config.get("chunker") == "regex":
+        chunker = regex_chunker.RegexChunker(
+            pattern=doc_processing_config["chunker_config"].get("pattern", "\\r?\\n")
+        )
+    elif doc_processing_config.get("chunker") == "character":
+        chunker = character_chunker.CharacterChunker(
+            chunk_size=doc_processing_config["chunker_config"].get("chunk_size", 1024),
+            overlap_size=doc_processing_config["chunker_config"].get("overlap_size", 0),
+        )
+    else:
+        raise ValueError("Invalid chunker type")
+
+    return chunker

From fce72c43c73aa425d859b8120bf5ccb94e6c995f Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Wed, 24 Jul 2024 15:24:47 -0400
Subject: [PATCH 04/53] changing queues for channels

---
 .../app/graphrag/graph_rag.py                 | 154 +++++++++++-------
 .../app/graphrag/util.py                      |  99 ++++++++---
 .../app/graphrag/worker.py                    |  11 +-
 eventual-consistency-service/requirements.txt |   1 +
 4 files changed, 173 insertions(+), 92 deletions(-)

diff --git a/eventual-consistency-service/app/graphrag/graph_rag.py b/eventual-consistency-service/app/graphrag/graph_rag.py
index 1477d9e0..0b5265b1 100644
--- a/eventual-consistency-service/app/graphrag/graph_rag.py
+++ b/eventual-consistency-service/app/graphrag/graph_rag.py
@@ -1,9 +1,10 @@
 import asyncio
 import logging
+import time
 
 import ecc_util
-from graphrag.util import install_query, stream_docs, upsert_chunk
-from graphrag.worker import worker
+from aiochannel import Channel
+from graphrag.util import chunk_doc, install_query, stream_docs
 from pyTigerGraph import TigerGraphConnection
 
 from common.chunkers.base_chunker import BaseChunker
@@ -25,52 +26,25 @@
 async def install_queries(
     requried_queries: list[str], conn: TigerGraphConnection, n_workers=8
 ):
-    loop = asyncio.get_event_loop()
-    tasks: list[asyncio.Task] = []
-
     # queries that are currently installed
     installed_queries = [q.split("/")[-1] for q in conn.getEndpoints(dynamic=True)]
 
-    # add queries to be installed into the queue
-    tq = asyncio.Queue()
-    for q in requried_queries:
-        q_name = q.split("/")[-1]
-        if q_name not in installed_queries:
-            tq.put_nowait((install_query, (conn, q)))
-
-    # start workers
-    for n in range(min(tq.qsize(), n_workers)):
-        task = loop.create_task(worker(n, tq))
-        tasks.append(task)
+    tasks = []
+    async with asyncio.TaskGroup() as grp:
+        for q in requried_queries:
+            async with asyncio.Semaphore(n_workers):
+                q_name = q.split("/")[-1]
+                # if the query is not installed, install it
+                if q_name not in installed_queries:
+                    task = grp.create_task(install_query(conn, q))
+                    tasks.append(task)
 
-    # wait for workers to finish jobs
-    await tq.join()
     for t in tasks:
         print(t.result())
         # TODO: Check if anything had an error
     return "", "", ""
 
 
-async def process_doc(
-    conn: TigerGraphConnection, doc: dict[str, str], sem: asyncio.Semaphore
-):
-    # TODO: Embed document and chunks
-    chunker = ecc_util.get_chunker()
-    try:
-        print(">>>>>", doc["v_id"], len(doc["attributes"]["text"]))
-        # await asyncio.sleep(5)
-        chunks = chunker.chunk(doc["attributes"]["text"])
-        v_id = doc["v_id"]
-        # TODO: n chunks at a time
-        for i, chunk in enumerate(chunks):
-            await upsert_chunk(conn, v_id, f"{v_id}_chunk_{i}", chunk)
-            # break  # single chunk FIXME: delete
-    finally:
-        sem.release()
-
-    return doc["v_id"]
-
-
 async def init(
     graphname: str, conn: TigerGraphConnection
 ) -> tuple[BaseChunker, dict[str, MilvusEmbeddingStore], BaseExtractor]:
@@ -124,6 +98,62 @@ async def init(
     return chunker, vector_indices, extractor
 
 
+async def process_docs(
+    conn: TigerGraphConnection,
+    docs_chan: Channel,
+    embed_q: Channel,
+    chunk_q: Channel,
+):
+    doc_tasks = []
+    async with asyncio.TaskGroup() as grp:
+        async for content in stream_docs(conn):
+            # only n workers at a time -- held up by semaphore size
+            async with asyncio.Semaphore(doc_workers):
+                task = grp.create_task(chunk_doc(conn, content, chunk_q, embed_q))
+                doc_tasks.append(task)
+            break  # single doc  FIXME: delete
+
+    # do something with doc_tasks?
+    for t in doc_tasks:
+        print(t.result())
+
+
+async def embed(embed_q: Channel):
+    pass
+
+
+async def upsert(upsert_q: Channel):
+    """
+    queue expects:
+    (func, args) <- q.get()
+    """
+    while upsert_q.empty():
+        await asyncio.sleep(1)
+
+    # consume task queue
+    print("upsert started")
+    responses = []
+    while not upsert_q.empty():
+        # get the next task
+        func, args = await upsert_q.get()
+
+        # execute the task
+        response = await func(*args)
+
+        # append task results to worker results/response
+        responses.append(response)
+
+        # mark task as done
+        upsert_q.task_done()
+
+    print(f"upsert done")
+    return responses
+
+
+async def extract(extract_q: Channel):
+    pass
+
+
 async def run(graphname: str, conn: TigerGraphConnection):
     """
     ecc flow
@@ -139,25 +169,33 @@ async def run(graphname: str, conn: TigerGraphConnection):
     """
 
     # init configurable objects
-    chunker, vector_indices, extractor = await init(graphname, conn)
-
-    # process docs
-    doc_workers = 48  # TODO: make configurable
-    doc_tasks = []
-    doc_sem = asyncio.Semaphore(doc_workers)
-
-    async with asyncio.TaskGroup() as tg:
-        async for content in stream_docs(conn):
-            # only n workers at a time -- held up by semaphore
-            print(">>>>>>>>>>>>>>>>>>>>>>>>\n", len(doc_tasks), "<<<<<<<<<")
-            await doc_sem.acquire()
-            task = tg.create_task(process_doc(conn, content, doc_sem))
-            doc_tasks.append(task)
-            break
-
-    # do something with doc_tasks
-    for t in doc_tasks:
-        print(t.result())
+    await init(graphname, conn)
+    # return
+    start = time.perf_counter()
+
+    # TODO: make configurable
+    tasks = []
+    docs_chan = Channel(48)  # process n chunks at a time max
+    chunk_chan = Channel(100)  # process 100 chunks at a time max
+    embed_chan = Channel(100)
+    upsert_chan = Channel(100)
+    async with asyncio.TaskGroup() as grp:
+        # get docs
+        t = grp.create_task(stream_docs(conn, docs_chan,10))
+        tasks.append(t)
+        # process docs
+        t = grp.create_task(process_docs(conn, docs_chan, embed_chan, chunk_chan))
+        tasks.append(t)
+        # embed
+        t = grp.create_task(embed(conn, doc_workers, embed_chan, chunk_chan))
+        tasks.append(t)
+        # upsert chunks
+        t = grp.create_task(upsert(conn, doc_workers, embed_chan, chunk_chan))
+        tasks.append(t)
+        # extract entities
+        t = grp.create_task(extract(conn, doc_workers, embed_chan, chunk_chan))
+        tasks.append(t)
+    end = time.perf_counter()
 
     print("DONE")
-    return f"hi from graph rag ecc: {conn.graphname} ({graphname})"
+    print(end - start)
diff --git a/eventual-consistency-service/app/graphrag/util.py b/eventual-consistency-service/app/graphrag/util.py
index ce2efe52..c18ec86a 100644
--- a/eventual-consistency-service/app/graphrag/util.py
+++ b/eventual-consistency-service/app/graphrag/util.py
@@ -5,6 +5,8 @@
 from urllib.parse import quote_plus
 
 import httpx
+from aiochannel import Channel
+from app import ecc_util
 from pyTigerGraph import TigerGraphConnection
 
 from common.logs.logwriter import LogWriter
@@ -80,7 +82,11 @@ async def stream_doc_ids(
         return {"error": True, "message": str(e)}
 
 
-async def stream_docs(conn: TigerGraphConnection, ttl_batches: int = 10):
+async def stream_docs(
+    conn: TigerGraphConnection,
+    docs_chan: Channel,
+    ttl_batches: int = 10,
+):
     headers = make_headers(conn)
     for i in range(ttl_batches):
         doc_ids = await stream_doc_ids(conn, i, ttl_batches)
@@ -88,9 +94,6 @@ async def stream_docs(conn: TigerGraphConnection, ttl_batches: int = 10):
             print(doc_ids)
             break  # TODO: handle error
 
-        print("*******")
-        print(doc_ids)
-        print("*******")
         for d in doc_ids["ids"]:
             async with httpx.AsyncClient(timeout=None) as client:
                 res = await client.get(
@@ -98,13 +101,35 @@ async def stream_docs(conn: TigerGraphConnection, ttl_batches: int = 10):
                     params={"doc": d},
                     headers=headers,
                 )
-
                 # TODO: check for errors
-                yield res.json()["results"][0]["DocContent"][0]
-            return  # single doc test FIXME: delete
+                # this will block and wait if the channel is full
+                await docs_chan.put(res.json()["results"][0]["DocContent"][0])
+            # return  # single doc test FIXME: delete
         # return # single batch test FIXME: delete
 
 
+async def chunk_doc(
+    conn: TigerGraphConnection,
+    doc: dict[str, str],
+    chunk_chan: Channel,
+    embed_chan: Channel,
+):
+    # TODO: Embed document and chunks
+    chunker = ecc_util.get_chunker()
+    chunks = chunker.chunk(doc["attributes"]["text"])
+    v_id = doc["v_id"]
+    # TODO: n chunks at a time
+    for i, chunk in enumerate(chunks):
+        # send chunks to be upserted (func, args)
+        await chunk_chan.put((upsert_chunk, (conn, v_id, f"{v_id}_chunk_{i}", chunk)))
+
+        # send chunks to be embedded
+
+        # break  # single chunk FIXME: delete
+
+    return doc["v_id"]
+
+
 def map_attrs(attributes: dict):
     # map attrs
     attrs = {}
@@ -124,7 +149,7 @@ async def upsert_vertex(
     conn: TigerGraphConnection,
     vertex_type: str,
     vertex_id: str,
-    attributes: dict = None,
+    attributes: dict,
 ):
     attrs = map_attrs(attributes)
     data = json.dumps({"vertices": {vertex_type: {vertex_id: attrs}}})
@@ -133,23 +158,44 @@ async def upsert_vertex(
         res = await client.post(
             f"{conn.restppUrl}/graph/{conn.graphname}", data=data, headers=headers
         )
-        print(res)
+        print(res.json())
+
 
 async def upsert_edge(
     conn: TigerGraphConnection,
-    vertex_type: str,
-    vertex_id: str,
+    src_v_type: str,
+    src_v_id: str,
+    edge_type: str,
+    tgt_v_type: str,
+    tgt_v_id: str,
     attributes: dict = None,
 ):
-   TODO 
-    attrs = map_attrs(attributes)
-    data = json.dumps({"vertices": {vertex_type: {vertex_id: attrs}}})
+    if attributes is None:
+        attrs = {}
+    else:
+        attrs = map_attrs(attributes)
+    data = json.dumps(
+        {
+            "edges": {
+                src_v_type: {
+                    src_v_id: {
+                        edge_type: {
+                            tgt_v_type: {
+                                tgt_v_id: attrs,
+                            }
+                        }
+                    },
+                }
+            }
+        }
+    )
     headers = make_headers(conn)
     async with httpx.AsyncClient(timeout=None) as client:
         res = await client.post(
             f"{conn.restppUrl}/graph/{conn.graphname}", data=data, headers=headers
         )
-        print(res)
+        print(res.json())
+
 
 async def upsert_chunk(conn: TigerGraphConnection, doc_id, chunk_id, chunk):
     date_added = int(time.time())
@@ -165,13 +211,16 @@ async def upsert_chunk(conn: TigerGraphConnection, doc_id, chunk_id, chunk):
         chunk_id,
         attributes={"text": chunk, "epoch_added": date_added},
     )
-    conn.upsertEdge("DocumentChunk", chunk_id, "HAS_CONTENT", "Content", chunk_id)
-    # self.conn.upsertEdge("Document", doc_id, "HAS_CHILD", "DocumentChunk", chunk_id)
-    # if int(chunk_id.split("_")[-1]) > 0:
-    #     self.conn.upsertEdge(
-    #         "DocumentChunk",
-    #         chunk_id,
-    #         "IS_AFTER",
-    #         "DocumentChunk",
-    #         doc_id + "_chunk_" + str(int(chunk_id.split("_")[-1]) - 1),
-    #     )
+    await upsert_edge(
+        conn, "DocumentChunk", chunk_id, "HAS_CONTENT", "Content", chunk_id
+    )
+    await upsert_edge(conn, "Document", doc_id, "HAS_CHILD", "DocumentChunk", chunk_id)
+    if int(chunk_id.split("_")[-1]) > 0:
+        await upsert_edge(
+            conn,
+            "DocumentChunk",
+            chunk_id,
+            "IS_AFTER",
+            "DocumentChunk",
+            doc_id + "_chunk_" + str(int(chunk_id.split("_")[-1]) - 1),
+        )
diff --git a/eventual-consistency-service/app/graphrag/worker.py b/eventual-consistency-service/app/graphrag/worker.py
index a2c7bbb6..40720deb 100644
--- a/eventual-consistency-service/app/graphrag/worker.py
+++ b/eventual-consistency-service/app/graphrag/worker.py
@@ -1,18 +1,11 @@
 import asyncio
 
-
-# class Channel(asyncio.Queue):
-#     def __init__(self, maxsize=0):
-#         self.is_open = True
-#         super().__init__(maxsize)
-#
-#     def close(self):
-#         self.is_open = False
+from aiochannel import Channel
 
 
 async def worker(
     n: int,
-    task_queue: asyncio.Queue,
+    task_queue: Channel,
 ):
     # init worker logging/reporting (TODO)
     worker_name = f"worker-{n+1}"
diff --git a/eventual-consistency-service/requirements.txt b/eventual-consistency-service/requirements.txt
index 90cc7f2c..3bc0dae0 100644
--- a/eventual-consistency-service/requirements.txt
+++ b/eventual-consistency-service/requirements.txt
@@ -1,3 +1,4 @@
+aiochannel==1.2.1
 aiohttp==3.9.3
 aiosignal==1.3.1
 annotated-types==0.5.0

From 46d73dc039ef005c4680c525c2e417225f1d2951 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Wed, 24 Jul 2024 16:45:18 -0400
Subject: [PATCH 05/53] graphrag etl with channels

---
 copilot/docs/notebooks/graphrag.ipynb         | 154 +++++++++++-------
 .../app/graphrag/graph_rag.py                 | 125 ++++++++++----
 .../app/graphrag/util.py                      |  35 +++-
 3 files changed, 207 insertions(+), 107 deletions(-)

diff --git a/copilot/docs/notebooks/graphrag.ipynb b/copilot/docs/notebooks/graphrag.ipynb
index 3b1200af..57ea4b48 100644
--- a/copilot/docs/notebooks/graphrag.ipynb
+++ b/copilot/docs/notebooks/graphrag.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -16,51 +16,70 @@
     "username = os.getenv(\"USERNAME\", \"tigergraph\")\n",
     "password = os.getenv(\"PASS\", \"tigergraph\")\n",
     "conn = TigerGraphConnection(\n",
-    "    host=host, username=username, password=password, graphname=\"GraphRAG_pytgdocs\"\n",
-    ")\n",
-    "\n",
-    "conn.getToken()\n",
-    "\n",
-    "# And then add CoPilot's address to the connection. This address\n",
-    "# is the host's address where the CoPilot container is running.\n",
-    "conn.ai.configureCoPilotHost(\"http://localhost:8000\")"
+    "    host=host,\n",
+    "    username=username,\n",
+    "    password=password,\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'The graph GraphRAG_pytgdocs is created.'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "conn.graphname = \"GraphRAG_pytgdocs\"\n",
-    "# conn.gsql(\"\"\"CREATE GRAPH pyTigerGraphRAG()\"\"\")"
+    "conn.gsql(\"\"\"CREATE GRAPH GraphRAG_pytgdocs()\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "_ = conn.getToken()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "{'host_name': 'https://algotesting.i.tgcloud.io',\n",
-       " 'schema_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_schema].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_schema\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 0)\\\\nTrying to add local vertex \\'DocumentChunk\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Document\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Concept\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Entity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Relationship\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'DocumentCollection\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Content\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Community\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'ResolvedEntity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CONTENT\\' and its reverse edge \\'reverse_HAS_CONTENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_CHILD_OF\\' and its reverse edge \\'reverse_IS_CHILD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_HEAD_OF\\' and its reverse edge \\'reverse_IS_HEAD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_TAIL\\' and its reverse edge \\'reverse_HAS_TAIL\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_RELATIONSHIP\\' and its reverse edge \\'reverse_DESCRIBES_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_ENTITY\\' and its reverse edge \\'reverse_DESCRIBES_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_ENTITY\\' and its reverse edge \\'reverse_CONTAINS_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'MENTIONS_RELATIONSHIP\\' and its reverse edge \\'reverse_MENTIONS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_AFTER\\' and its reverse edge \\'reverse_IS_AFTER\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CHILD\\' and its reverse edge \\'reverse_HAS_CHILD\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_RELATIONSHIP\\' and its reverse edge \\'reverse_HAS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_DOCUMENT\\' and its reverse edge \\'reverse_CONTAINS_DOCUMENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'KNN\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVES_TO\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVED_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IN_COMMUNITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 1\\\\nThe job add_supportai_schema completes in 2.829 seconds!\\\\nLocal schema change succeeded.\"',\n",
-       " 'index_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_indexes].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_indexes\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 1)\\\\nTrying to add index \\'doc_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_indexepoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 2\\\\nThe job add_supportai_indexes completes in 2.002 seconds!\\\\nLocal schema change succeeded.\"'}"
+       " 'schema_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_schema].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_schema\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 0)\\\\nTrying to add local vertex \\'DocumentChunk\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Document\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Concept\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Entity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Relationship\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'DocumentCollection\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Content\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Community\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'ResolvedEntity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CONTENT\\' and its reverse edge \\'reverse_HAS_CONTENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_CHILD_OF\\' and its reverse edge \\'reverse_IS_CHILD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_HEAD_OF\\' and its reverse edge \\'reverse_IS_HEAD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_TAIL\\' and its reverse edge \\'reverse_HAS_TAIL\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_RELATIONSHIP\\' and its reverse edge \\'reverse_DESCRIBES_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_ENTITY\\' and its reverse edge \\'reverse_DESCRIBES_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_ENTITY\\' and its reverse edge \\'reverse_CONTAINS_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'MENTIONS_RELATIONSHIP\\' and its reverse edge \\'reverse_MENTIONS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_AFTER\\' and its reverse edge \\'reverse_IS_AFTER\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CHILD\\' and its reverse edge \\'reverse_HAS_CHILD\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_RELATIONSHIP\\' and its reverse edge \\'reverse_HAS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_DOCUMENT\\' and its reverse edge \\'reverse_CONTAINS_DOCUMENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'KNN\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVES_TO\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVED_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IN_COMMUNITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 1\\\\nThe job add_supportai_schema completes in 2.434 seconds!\\\\nLocal schema change succeeded.\"',\n",
+       " 'index_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_indexes].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_indexes\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 1)\\\\nTrying to add index \\'doc_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_indexepoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 2\\\\nThe job add_supportai_indexes completes in 1.932 seconds!\\\\nLocal schema change succeeded.\"'}"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "# And then add CoPilot's address to the connection. This address\n",
+    "# is the host's address where the CoPilot container is running.\n",
+    "conn.ai.configureCoPilotHost(\"http://localhost:8000\")\n",
     "conn.ai.initializeSupportAI()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -76,18 +95,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'job_name': 'load_documents_content_json_75b43aab4f714888b2be3f30441e745a',\n",
-       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_75b43aab4f714888b2be3f30441e745a.stream.SupportAI_GraphRAG_pytgdocs_f0e175af264a4a18b1aa3bf8f4063d0e.1721674044503',\n",
-       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_75b43aab4f714888b2be3f30441e745a.stream.SupportAI_GraphRAG_pytgdocs_f0e175af264a4a18b1aa3bf8f4063d0e.1721674044503'}"
+       "{'job_name': 'load_documents_content_json_203b064024e3499ea41b876cc67a85cf',\n",
+       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_203b064024e3499ea41b876cc67a85cf.stream.SupportAI_GraphRAG_pytgdocs_5b098715edbd4c878f7425918eb553c0.1721853566538',\n",
+       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_203b064024e3499ea41b876cc67a85cf.stream.SupportAI_GraphRAG_pytgdocs_5b098715edbd4c878f7425918eb553c0.1721853566538'}"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -102,42 +121,67 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "import httpx\n",
-    "import base64\n",
-    "\n",
-    "# conn.ai.forceConsistencyUpdate()\n",
-    "# url = self.nlqs_host+\"/\"+self.conn.graphname+\"/supportai/forceupdate\"\n",
-    "# return self.conn._req(\"GET\", url, authMode=\"pwd\", resKey=None)\n",
-    "httpx.get(f\"http://localhost:8000/{conn.graphname}/supportai/forceupdate\")"
+    "asdf"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "content='Hello! How can I assist you today?' response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 8, 'total_tokens': 17}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-2a50fab6-62fc-433c-98b4-221346ca41c6-0' usage_metadata={'input_tokens': 8, 'output_tokens': 9, 'total_tokens': 17}\n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
-       "Joke(setup='Why was the cat sitting on the computer?', punchline='To keep an eye on the mouse!')"
+       "{'job_name': 'load_documents_content_json_203b064024e3499ea41b876cc67a85cf',\n",
+       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_203b064024e3499ea41b876cc67a85cf.stream.SupportAI_GraphRAG_pytgdocs_5b098715edbd4c878f7425918eb553c0.1721853623658',\n",
+       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_203b064024e3499ea41b876cc67a85cf.stream.SupportAI_GraphRAG_pytgdocs_5b098715edbd4c878f7425918eb553c0.1721853623658'}"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
+   "source": [
+    "for v in [\"Document\", \"Content\", \"DocumentChunk\"]:\n",
+    "    try:\n",
+    "        conn.delVertices(v)\n",
+    "    except:\n",
+    "        pass\n",
+    "\n",
+    "import time\n",
+    "time.sleep(3)\n",
+    "conn.ai.runDocumentIngest(\n",
+    "    res[\"load_job_id\"],\n",
+    "    res[\"data_source_id\"],\n",
+    "    \"s3://tg-documentation/pytg_current/pytg_current.jsonl\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import httpx\n",
+    "import base64\n",
+    "\n",
+    "# conn.ai.forceConsistencyUpdate()\n",
+    "# url = self.nlqs_host+\"/\"+self.conn.graphname+\"/supportai/forceupdate\"\n",
+    "# return self.conn._req(\"GET\", url, authMode=\"pwd\", resKey=None)\n",
+    "httpx.get(f\"http://localhost:8000/{conn.graphname}/supportai/forceupdate\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "from langchain_core.pydantic_v1 import BaseModel, Field\n",
     "from langchain_openai import ChatOpenAI\n",
@@ -149,15 +193,14 @@
     "\n",
     "\n",
     "model = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)\n",
-    "print(model.invoke('hi'))\n",
+    "print(model.invoke(\"hi\"))\n",
     "structured_llm = model.with_structured_output(Joke)\n",
-    "structured_llm.invoke(\"Tell me a joke about cats\")\n",
-    "\n"
+    "structured_llm.invoke(\"Tell me a joke about cats\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -165,12 +208,14 @@
     "from langchain_experimental.graph_transformers import LLMGraphTransformer\n",
     "from langchain_openai import ChatOpenAI\n",
     "import os\n",
+    "\n",
     "# from langchain_core.pydantic_v1 import BaseModel\n",
     "from pydantic import BaseModel\n",
     "\n",
     "\n",
     "class AnswerWithJustification(BaseModel):\n",
     "    \"\"\"An answer to the user question along with justification for the answer.\"\"\"\n",
+    "\n",
     "    answer: str\n",
     "    justification: str\n",
     "\n",
@@ -181,6 +226,7 @@
     "# sllm = llm.with_structured_output(AnswerWithJustification)\n",
     "# print(sllm.invoke(\"What weighs more a pound of bricks or a pound of feathers\"))\n",
     "\n",
+    "\n",
     "class GraphExtractor:\n",
     "    def __init__(self):\n",
     "        self.transformer = LLMGraphTransformer(\n",
@@ -197,25 +243,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "id='Marie Curie' type='Person' properties={'description': 'A Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.'}\n",
-      "id='Pierre Curie' type='Person' properties={'description': 'Husband of Marie Curie and co-winner of her first Nobel Prize.'}\n",
-      "id='University Of Paris' type='Institution' properties={'description': 'The institution where Marie Curie became the first woman professor in 1906.'}\n",
-      "id='Nobel Prize' type='Award' properties={'description': 'An award won by Marie Curie, first woman to win it and first person to win it twice.'}\n",
-      "source=Node(id='Marie Curie', type='Person') target=Node(id='Pierre Curie', type='Person') type='HUSBAND' properties={'description': \"Marie Curie's husband and co-winner of her first Nobel Prize.\"}\n",
-      "source=Node(id='Marie Curie', type='Person') target=Node(id='Nobel Prize', type='Award') type='WINNER' properties={'description': 'First woman to win a Nobel Prize.'}\n",
-      "source=Node(id='Marie Curie', type='Person') target=Node(id='Nobel Prize', type='Award') type='WINNER' properties={'description': 'First person to win a Nobel Prize twice.'}\n",
-      "source=Node(id='Marie Curie', type='Person') target=Node(id='Nobel Prize', type='Award') type='WINNER' properties={'description': 'Only person to win a Nobel Prize in two scientific fields.'}\n",
-      "source=Node(id='Marie Curie', type='Person') target=Node(id='University Of Paris', type='Institution') type='PROFESSOR' properties={'description': 'First woman to become a professor at the University of Paris in 1906.'}\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "text = \"\"\"\n",
     "Marie Curie, born in 1867, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.\n",
diff --git a/eventual-consistency-service/app/graphrag/graph_rag.py b/eventual-consistency-service/app/graphrag/graph_rag.py
index 0b5265b1..96a591bc 100644
--- a/eventual-consistency-service/app/graphrag/graph_rag.py
+++ b/eventual-consistency-service/app/graphrag/graph_rag.py
@@ -98,60 +98,111 @@ async def init(
     return chunker, vector_indices, extractor
 
 
-async def process_docs(
+async def chunk_docs(
     conn: TigerGraphConnection,
     docs_chan: Channel,
-    embed_q: Channel,
-    chunk_q: Channel,
+    embed_chan: Channel,
+    upsert_chan: Channel,
+    extract_chan: Channel,
 ):
+    """
+    Creates and starts one worker for each document
+    in the docs channel.
+    """
     doc_tasks = []
     async with asyncio.TaskGroup() as grp:
-        async for content in stream_docs(conn):
-            # only n workers at a time -- held up by semaphore size
-            async with asyncio.Semaphore(doc_workers):
-                task = grp.create_task(chunk_doc(conn, content, chunk_q, embed_q))
-                doc_tasks.append(task)
-            break  # single doc  FIXME: delete
+        async for content in docs_chan:
+            await embed_chan.put(content)  # send the document to be embedded
+            task = grp.create_task(
+                chunk_doc(conn, content, upsert_chan, embed_chan, extract_chan)
+            )
+            doc_tasks.append(task)
+            # break  # single doc  FIXME: delete
 
     # do something with doc_tasks?
     for t in doc_tasks:
         print(t.result())
 
+    # FIXME: don't close these there, other functions will send to them
+    upsert_chan.close()
+    embed_chan.close()
 
-async def embed(embed_q: Channel):
-    pass
+    # close the extract chan -- chunk_doc is the only sender
+    # and chunk_doc calls are kicked off from here (this is technically the sender)
+    extract_chan.close()
 
 
-async def upsert(upsert_q: Channel):
+async def upsert(upsert_chan: Channel):
     """
+    Creates and starts one worker for each upsert job
     queue expects:
     (func, args) <- q.get()
     """
-    while upsert_q.empty():
-        await asyncio.sleep(1)
 
     # consume task queue
-    print("upsert started")
+    upsert_tasks = []
+    async with asyncio.TaskGroup() as grp:
+        async for func, args in upsert_chan:
+            # print("func name >>>>>", func.__name__, args)
+            # grp.create_task(todo())
+            # continue
+
+            # execute the task
+            t = grp.create_task(func(*args))
+            upsert_tasks.append(t)
+
+    print(f"upsert done")
+    # do something with doc_tasks?
+    for t in upsert_tasks:
+        print(t.result())
+
+
+async def embed(embed_chan: Channel):
+    """
+    Creates and starts one worker for each embed job
+    """
+
+    # consume task queue
     responses = []
-    while not upsert_q.empty():
-        # get the next task
-        func, args = await upsert_q.get()
+    async with asyncio.TaskGroup() as grp:
+        async for item in embed_chan:
+            print("embed item>>>>>", type(item))
+            grp.create_task(todo())
+            continue
+            # execute the task
+            # response = await func(*args)
 
-        # execute the task
-        response = await func(*args)
+            # append task results to worker results/response
+            # responses.append(response)
 
-        # append task results to worker results/response
-        responses.append(response)
+    print(f"embed done")
+    return responses
 
-        # mark task as done
-        upsert_q.task_done()
 
-    print(f"upsert done")
+async def extract(extract_chan: Channel):
+    """
+    Creates and starts one worker for each extract job
+    """
+
+    # consume task queue
+    responses = []
+    async with asyncio.TaskGroup() as grp:
+        async for item in extract_chan:
+            print("extract item>>>>>", type(item))
+            grp.create_task(todo())
+            continue
+            # execute the task
+            # response = await func(*args)
+
+            # append task results to worker results/response
+            # responses.append(response)
+
+    print(f"embed done")
     return responses
 
 
-async def extract(extract_q: Channel):
-    pass
+async def todo():
+    await asyncio.sleep(1)
 
 
 async def run(graphname: str, conn: TigerGraphConnection):
@@ -175,25 +226,27 @@ async def run(graphname: str, conn: TigerGraphConnection):
 
     # TODO: make configurable
     tasks = []
-    docs_chan = Channel(48)  # process n chunks at a time max
-    chunk_chan = Channel(100)  # process 100 chunks at a time max
+    docs_chan = Channel(15)  # process n chunks at a time max
     embed_chan = Channel(100)
     upsert_chan = Channel(100)
+    extract_chan = Channel(100)
     async with asyncio.TaskGroup() as grp:
         # get docs
-        t = grp.create_task(stream_docs(conn, docs_chan,10))
+        t = grp.create_task(stream_docs(conn, docs_chan, 10))
         tasks.append(t)
         # process docs
-        t = grp.create_task(process_docs(conn, docs_chan, embed_chan, chunk_chan))
-        tasks.append(t)
-        # embed
-        t = grp.create_task(embed(conn, doc_workers, embed_chan, chunk_chan))
+        t = grp.create_task(
+            chunk_docs(conn, docs_chan, embed_chan, upsert_chan, extract_chan)
+        )
         tasks.append(t)
         # upsert chunks
-        t = grp.create_task(upsert(conn, doc_workers, embed_chan, chunk_chan))
+        t = grp.create_task(upsert(upsert_chan))
+        tasks.append(t)
+        # # embed
+        t = grp.create_task(embed(embed_chan))
         tasks.append(t)
         # extract entities
-        t = grp.create_task(extract(conn, doc_workers, embed_chan, chunk_chan))
+        t = grp.create_task(extract(extract_chan))
         tasks.append(t)
     end = time.perf_counter()
 
diff --git a/eventual-consistency-service/app/graphrag/util.py b/eventual-consistency-service/app/graphrag/util.py
index c18ec86a..cfb84e5a 100644
--- a/eventual-consistency-service/app/graphrag/util.py
+++ b/eventual-consistency-service/app/graphrag/util.py
@@ -4,9 +4,9 @@
 import traceback
 from urllib.parse import quote_plus
 
+import ecc_util
 import httpx
 from aiochannel import Channel
-from app import ecc_util
 from pyTigerGraph import TigerGraphConnection
 
 from common.logs.logwriter import LogWriter
@@ -87,13 +87,18 @@ async def stream_docs(
     docs_chan: Channel,
     ttl_batches: int = 10,
 ):
+    """
+    Streams the document contents into the docs_chan
+    """
     headers = make_headers(conn)
     for i in range(ttl_batches):
         doc_ids = await stream_doc_ids(conn, i, ttl_batches)
         if doc_ids["error"]:
-            print(doc_ids)
             break  # TODO: handle error
 
+        print("********")
+        print(doc_ids)
+        print("********")
         for d in doc_ids["ids"]:
             async with httpx.AsyncClient(timeout=None) as client:
                 res = await client.get(
@@ -104,26 +109,38 @@ async def stream_docs(
                 # TODO: check for errors
                 # this will block and wait if the channel is full
                 await docs_chan.put(res.json()["results"][0]["DocContent"][0])
-            # return  # single doc test FIXME: delete
-        # return # single batch test FIXME: delete
+        #     break  # single doc test FIXME: delete
+        # break  # single batch test FIXME: delete
+
+    # close the docs chan -- this function is the only sender
+    docs_chan.close()
 
 
 async def chunk_doc(
     conn: TigerGraphConnection,
     doc: dict[str, str],
-    chunk_chan: Channel,
+    upsert_chan: Channel,
     embed_chan: Channel,
+    extract_chan: Channel,
 ):
-    # TODO: Embed document and chunks
+    """
+    Chunks a document.
+    Places the resulting chunks into the upsert channel (to be upserted to TG)
+    and the embed channel (to be embedded and written to the vector store)
+    """
     chunker = ecc_util.get_chunker()
     chunks = chunker.chunk(doc["attributes"]["text"])
     v_id = doc["v_id"]
     # TODO: n chunks at a time
     for i, chunk in enumerate(chunks):
         # send chunks to be upserted (func, args)
-        await chunk_chan.put((upsert_chunk, (conn, v_id, f"{v_id}_chunk_{i}", chunk)))
+        await upsert_chan.put((upsert_chunk, (conn, v_id, f"{v_id}_chunk_{i}", chunk)))
 
         # send chunks to be embedded
+        await embed_chan.put(chunk)
+
+        # send chunks to have entities extracted
+        await extract_chan.put(chunk)
 
         # break  # single chunk FIXME: delete
 
@@ -158,7 +175,7 @@ async def upsert_vertex(
         res = await client.post(
             f"{conn.restppUrl}/graph/{conn.graphname}", data=data, headers=headers
         )
-        print(res.json())
+        print("upsert vertex>>>", res.json())
 
 
 async def upsert_edge(
@@ -194,7 +211,7 @@ async def upsert_edge(
         res = await client.post(
             f"{conn.restppUrl}/graph/{conn.graphname}", data=data, headers=headers
         )
-        print(res.json())
+        print("upsert edge >>>", res.json())
 
 
 async def upsert_chunk(conn: TigerGraphConnection, doc_id, chunk_id, chunk):

From 7501a37b400eff0334c11aa8adc264ded66d66ca Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Mon, 29 Jul 2024 16:31:24 -0400
Subject: [PATCH 06/53] pytg in 175 seconds

---
 common/config.py                              |   2 +-
 common/embeddings/embedding_services.py       |  37 ++-
 common/embeddings/milvus_embedding_store.py   |  87 +++++-
 common/extractors/BaseExtractor.py            |  13 +-
 common/extractors/GraphExtractor.py           |  50 ++++
 .../LLMEntityRelationshipExtractor.py         |  35 ++-
 common/gsql/graphRAG/StreamDocContent.gsql    |   5 +-
 common/gsql/graphRAG/StreamDocIds.gsql        |   7 +-
 common/gsql/supportai/SupportAI_Schema.gsql   |  12 +-
 common/logs/logwriter.py                      |   2 +-
 common/py_schemas/tool_io_schemas.py          |   2 +-
 .../app/graphrag/graph_rag.py                 | 252 ++++++++----------
 .../app/graphrag/util.py                      | 240 ++++++++---------
 .../app/graphrag/worker.py                    |  35 ---
 .../app/graphrag/workers.py                   | 226 ++++++++++++++++
 eventual-consistency-service/app/main.py      |   3 +-
 16 files changed, 668 insertions(+), 340 deletions(-)
 delete mode 100644 eventual-consistency-service/app/graphrag/worker.py
 create mode 100644 eventual-consistency-service/app/graphrag/workers.py

diff --git a/common/config.py b/common/config.py
index 2546e38a..ec72455d 100644
--- a/common/config.py
+++ b/common/config.py
@@ -167,7 +167,7 @@ def get_llm_service(llm_config) -> LLM_Model:
     doc_processing_config = {
         "chunker": "semantic",
         "chunker_config": {"method": "percentile", "threshold": 0.95},
-        "extractor": "llm",
+        "extractor": "graphrag",
         "extractor_config": {},
     }
 elif DOC_PROCESSING_CONFIG.endswith(".json"):
diff --git a/common/embeddings/embedding_services.py b/common/embeddings/embedding_services.py
index c76bf46d..dd506670 100644
--- a/common/embeddings/embedding_services.py
+++ b/common/embeddings/embedding_services.py
@@ -1,11 +1,13 @@
+import logging
 import os
+import time
 from typing import List
+
 from langchain.schema.embeddings import Embeddings
-import logging
-import time
+
 from common.logs.log import req_id_cv
-from common.metrics.prometheus_metrics import metrics
 from common.logs.logwriter import LogWriter
+from common.metrics.prometheus_metrics import metrics
 
 logger = logging.getLogger(__name__)
 
@@ -87,6 +89,33 @@ def embed_query(self, question: str) -> List[float]:
                 duration
             )
 
+    async def aembed_query(self, question: str) -> List[float]:
+        """Embed Query Async.
+        Embed a string.
+
+        Args:
+            question (str):
+                A string to embed.
+        """
+        # start_time = time.time()
+        # metrics.llm_inprogress_requests.labels(self.model_name).inc()
+
+        # try:
+        logger.debug_pii(f"aembed_query() embedding question={question}")
+        query_embedding = await self.embeddings.aembed_query(question)
+        # metrics.llm_success_response_total.labels(self.model_name).inc()
+        return query_embedding
+        # except Exception as e:
+        #     # metrics.llm_query_error_total.labels(self.model_name).inc()
+        #     raise e
+        # finally:
+        #     metrics.llm_request_total.labels(self.model_name).inc()
+        #     metrics.llm_inprogress_requests.labels(self.model_name).dec()
+        #     duration = time.time() - start_time
+        #     metrics.llm_request_duration_seconds.labels(self.model_name).observe(
+        #         duration
+        #     )
+
 
 class AzureOpenAI_Ada002(EmbeddingModel):
     """Azure OpenAI Ada-002 Embedding Model"""
@@ -124,8 +153,8 @@ class AWS_Bedrock_Embedding(EmbeddingModel):
     """AWS Bedrock Embedding Model"""
 
     def __init__(self, config):
-        from langchain_community.embeddings import BedrockEmbeddings
         import boto3
+        from langchain_community.embeddings import BedrockEmbeddings
 
         super().__init__(config=config, model_name=config["embedding_model"])
 
diff --git a/common/embeddings/milvus_embedding_store.py b/common/embeddings/milvus_embedding_store.py
index 8a52d05f..ac9c5389 100644
--- a/common/embeddings/milvus_embedding_store.py
+++ b/common/embeddings/milvus_embedding_store.py
@@ -1,18 +1,17 @@
 import logging
+import traceback
 from time import sleep, time
 from typing import Iterable, List, Optional, Tuple
 
 from langchain_community.vectorstores import Milvus
 from langchain_core.documents.base import Document
-from pymilvus import connections, utility
-from pymilvus.exceptions import MilvusException
+from pymilvus import MilvusException, connections, utility
 
 from common.embeddings.base_embedding_store import EmbeddingStore
 from common.embeddings.embedding_services import EmbeddingModel
 from common.logs.log import req_id_cv
-from common.metrics.prometheus_metrics import metrics
 from common.logs.logwriter import LogWriter
-from pymilvus import MilvusException
+from common.metrics.prometheus_metrics import metrics
 
 logger = logging.getLogger(__name__)
 
@@ -77,7 +76,7 @@ def connect_to_milvus(self):
         while retry_attempt < self.max_retry_attempts:
             try:
                 connections.connect(**self.milvus_connection)
-                metrics.milvus_active_connections.labels(self.collection_name).inc
+                # metrics.milvus_active_connections.labels(self.collection_name).inc
                 LogWriter.info(
                     f"""Initializing Milvus with host={self.milvus_connection.get("host", self.milvus_connection.get("uri", "unknown host"))},
                     port={self.milvus_connection.get('port', 'unknown')}, username={self.milvus_connection.get('user', 'unknown')}, collection={self.collection_name}"""
@@ -213,6 +212,76 @@ def add_embeddings(
             error_message = f"An error occurred while registering document: {str(e)}"
             LogWriter.error(error_message)
 
+    async def aadd_embeddings(
+        self,
+        embeddings: Iterable[Tuple[str, List[float]]],
+        metadatas: List[dict] = None,
+    ):
+        """Async Add Embeddings.
+        Add embeddings to the Embedding store.
+        Args:
+            embeddings (Iterable[Tuple[str, List[float]]]):
+                Iterable of content and embedding of the document.
+            metadatas (List[Dict]):
+                List of dictionaries containing the metadata for each document.
+                The embeddings and metadatas list need to have identical indexing.
+        """
+        try:
+            if metadatas is None:
+                metadatas = []
+
+            # add fields required by Milvus if they do not exist
+            if self.support_ai_instance:
+                for metadata in metadatas:
+                    if self.vertex_field not in metadata:
+                        metadata[self.vertex_field] = ""
+            else:
+                for metadata in metadatas:
+                    if "seq_num" not in metadata:
+                        metadata["seq_num"] = 1
+                    if "source" not in metadata:
+                        metadata["source"] = ""
+
+            LogWriter.info(
+                f"request_id={req_id_cv.get()} Milvus ENTRY aadd_embeddings()"
+            )
+            texts = [text for text, _ in embeddings]
+
+            # operation_type = "add_texts"
+            # metrics.milvus_query_total.labels(
+            #     self.collection_name, operation_type
+            # ).inc()
+            # start_time = time()
+
+            added = await self.milvus.aadd_texts(texts=texts, metadatas=metadatas)
+
+            # duration = time() - start_time
+            # metrics.milvus_query_duration_seconds.labels(
+            #     self.collection_name, operation_type
+            # ).observe(duration)
+
+            LogWriter.info(
+                f"request_id={req_id_cv.get()} Milvus EXIT aadd_embeddings()"
+            )
+
+            # Check if registration was successful
+            if added:
+                success_message = f"Document registered with id: {added[0]}"
+                LogWriter.info(success_message)
+                return success_message
+            else:
+                error_message = f"Failed to register document {added}"
+                LogWriter.error(error_message)
+                raise Exception(error_message)
+
+        except Exception as e:
+            error_message = f"An error occurred while registering document:{metadatas} ({len(texts)},{len(metadatas)})\nErr: {str(e)}"
+            LogWriter.error(error_message)
+            exc = traceback.format_exc()
+            LogWriter.error(exc)
+            LogWriter.error(f"{texts}")
+            raise e
+
     def get_pks(
         self,
         expr: str,
@@ -506,11 +575,11 @@ def query(self, expr: str, output_fields: List[str]):
             return None
 
         try:
-            query_result = self.milvus.col.query(
-                expr=expr, output_fields=output_fields
-            )
+            query_result = self.milvus.col.query(expr=expr, output_fields=output_fields)
         except MilvusException as exc:
-            LogWriter.error(f"Failed to get outputs: {self.milvus.collection_name} error: {exc}")
+            LogWriter.error(
+                f"Failed to get outputs: {self.milvus.collection_name} error: {exc}"
+            )
             raise exc
 
         return query_result
diff --git a/common/extractors/BaseExtractor.py b/common/extractors/BaseExtractor.py
index 3f1ec92b..e8638665 100644
--- a/common/extractors/BaseExtractor.py
+++ b/common/extractors/BaseExtractor.py
@@ -1,6 +1,13 @@
-class BaseExtractor:
-    def __init__():
+from abc import ABC, abstractmethod
+
+from langchain_community.graphs.graph_document import GraphDocument
+
+
+class BaseExtractor(ABC):
+    @abstractmethod
+    def extract(self, text:str):
         pass
 
-    def extract(self, text):
+    @abstractmethod
+    async def aextract(self, text:str) -> list[GraphDocument]:
         pass
diff --git a/common/extractors/GraphExtractor.py b/common/extractors/GraphExtractor.py
index c8f24355..282729a4 100644
--- a/common/extractors/GraphExtractor.py
+++ b/common/extractors/GraphExtractor.py
@@ -16,6 +16,56 @@ def __init__(self):
         )
 
     def extract(self, text) -> list[GraphDocument]:
+        """
+        returns a list of GraphDocument:
+        Each doc is:
+            nodes=[
+                Node(
+                    id='Marie Curie',
+                    type='Person',
+                    properties={
+                        'description': 'A Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.'
+                    }
+                ),
+                ...
+            ],
+            relationships=[
+                Relationship(
+                    source=Node(id='Marie Curie', type='Person'),
+                    target=Node(id='Pierre Curie', type='Person'),
+                    type='SPOUSE'
+                ),
+                ...
+            ]
+        """
         doc = Document(page_content=text)
         graph_docs = self.transformer.convert_to_graph_documents([doc])
+        translated_docs = self.translate(graph_docs)
+        return translated_docs
+
+    async def aextract(self, text:str) -> list[GraphDocument]:
+        """
+        returns a list of GraphDocument:
+        Each doc is:
+            nodes=[
+                Node(
+                    id='Marie Curie',
+                    type='Person',
+                    properties={
+                        'description': 'A Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.'
+                    }
+                ),
+                ...
+            ],
+            relationships=[
+                Relationship(
+                    source=Node(id='Marie Curie', type='Person'),
+                    target=Node(id='Pierre Curie', type='Person'),
+                    type='SPOUSE'
+                ),
+                ...
+            ]
+        """
+        doc = Document(page_content=text)
+        graph_docs = await self.transformer.aconvert_to_graph_documents([doc])
         return graph_docs
diff --git a/common/extractors/LLMEntityRelationshipExtractor.py b/common/extractors/LLMEntityRelationshipExtractor.py
index d5a0a970..415c3235 100644
--- a/common/extractors/LLMEntityRelationshipExtractor.py
+++ b/common/extractors/LLMEntityRelationshipExtractor.py
@@ -1,8 +1,9 @@
-from common.llm_services import LLM_Model
+import json
+from typing import List
+
 from common.extractors.BaseExtractor import BaseExtractor
+from common.llm_services import LLM_Model
 from common.py_schemas import KnowledgeGraph
-from typing import List
-import json
 
 
 class LLMEntityRelationshipExtractor(BaseExtractor):
@@ -19,6 +20,34 @@ def __init__(
         self.strict_mode = strict_mode
 
     def _extract_kg_from_doc(self, doc, chain, parser):
+        """
+        returns:
+        {
+            "nodes": [
+                {
+                    "id": "str",
+                    "type": "string",
+                    "definition": "string"
+                }
+            ],
+            "rels": [
+                {
+                    "source":{
+                        "id": "str",
+                        "type": "string",
+                        "definition": "string"
+                    }
+                    "target":{
+                        "id": "str",
+                        "type": "string",
+                        "definition": "string"
+                    }
+                    "definition"
+                }
+            ]
+        }
+        """
+
         try:
             out = chain.invoke(
                 {"input": doc, "format_instructions": parser.get_format_instructions()}
diff --git a/common/gsql/graphRAG/StreamDocContent.gsql b/common/gsql/graphRAG/StreamDocContent.gsql
index fb7338b7..87f12566 100644
--- a/common/gsql/graphRAG/StreamDocContent.gsql
+++ b/common/gsql/graphRAG/StreamDocContent.gsql
@@ -1,5 +1,6 @@
-CREATE QUERY StreamDocContent(Vertex<Document> doc) {
+CREATE DISTRIBUTED QUERY StreamDocContent(Vertex<Document> doc) {
     Doc = {doc};
-    DocContent = SELECT c FROM Doc:d -(HAS_CONTENT)-> Content:c;
+    DocContent = SELECT c FROM Doc:d -(HAS_CONTENT)-> Content:c
+                 POST-ACCUM d.epoch_processed = datetime_to_epoch(now());
     PRINT DocContent;
 }
diff --git a/common/gsql/graphRAG/StreamDocIds.gsql b/common/gsql/graphRAG/StreamDocIds.gsql
index fb373490..d5ec982e 100644
--- a/common/gsql/graphRAG/StreamDocIds.gsql
+++ b/common/gsql/graphRAG/StreamDocIds.gsql
@@ -1,10 +1,13 @@
-CREATE QUERY StreamDocIds(INT current_batch, INT ttl_batches) {
+CREATE DISTRIBUTED QUERY StreamDocIds(INT current_batch, INT ttl_batches) {
     ListAccum<STRING> @@doc_ids;
     Docs = {Document.*};
 
     Docs = SELECT d FROM Docs:d
            WHERE vertex_to_int(d) % ttl_batches == current_batch
-           ACCUM @@doc_ids += d.id;
+             AND d.epoch_processed == 0
+             AND d.epoch_processing == 0 
+           ACCUM @@doc_ids += d.id
+           POST-ACCUM d.epoch_processing = datetime_to_epoch(now());
     
     PRINT @@doc_ids;
 }
diff --git a/common/gsql/supportai/SupportAI_Schema.gsql b/common/gsql/supportai/SupportAI_Schema.gsql
index 0998affe..0e3cf6c3 100644
--- a/common/gsql/supportai/SupportAI_Schema.gsql
+++ b/common/gsql/supportai/SupportAI_Schema.gsql
@@ -2,7 +2,7 @@ CREATE SCHEMA_CHANGE JOB add_supportai_schema {
     ADD VERTEX DocumentChunk(PRIMARY_ID id STRING, idx INT, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX Document(PRIMARY_ID id STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX Concept(PRIMARY_ID id STRING, description STRING, concept_type STRING, human_curated BOOL, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
-    ADD VERTEX Entity(PRIMARY_ID id STRING, definition STRING, description STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
+    ADD VERTEX Entity(PRIMARY_ID id STRING, definition STRING, description STRING, entity_type STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX Relationship(PRIMARY_ID id STRING, definition STRING, short_name STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX DocumentCollection(PRIMARY_ID id STRING, epoch_added UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX Content(PRIMARY_ID id STRING, text STRING, epoch_added UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
@@ -21,12 +21,12 @@ CREATE SCHEMA_CHANGE JOB add_supportai_schema {
 
     // GraphRAG
     ADD VERTEX Community(PRIMARY_ID id STRING, description INT) WITH PRIMARY_ID_AS_ATTRIBUTE="true";
-    ADD VERTEX ResolvedEntity(PRIMARY_ID id STRING, description STRING) WITH PRIMARY_ID_AS_ATTRIBUTE="true";
+    ADD VERTEX ResolvedEntity(PRIMARY_ID id STRING, description STRING, entity_type STRING) WITH PRIMARY_ID_AS_ATTRIBUTE="true";
 
-    ADD DIRECTED EDGE KNN(FROM Entity, TO Entity); // TODO: check where knn algo writes results
-    ADD DIRECTED EDGE RESOLVES_TO(FROM Entity, TO ResolvedEntity); // Connect ResolvedEntities with their children entities
-    ADD DIRECTED EDGE RESOLVED_RELATIONSHIP(FROM ResolvedEntity, TO ResolvedEntity, weight UINT); // store edges between entities after they're resolved
-    ADD DIRECTED EDGE IN_COMMUNITY(FROM ResolvedEntity, TO Community);
+    ADD DIRECTED EDGE RELATIONSHIP(FROM Entity, TO Entity, relation_type STRING) WITH REVERSE_EDGE="reverse_RELATIONSHIP"; // TODO: check where knn algo writes results
+    ADD DIRECTED EDGE RESOLVES_TO(FROM Entity, TO ResolvedEntity, relation_type STRING) WITH REVERSE_EDGE="reverse_RESOLVES_TO"; // Connect ResolvedEntities with their children entities
+    ADD DIRECTED EDGE RESOLVED_RELATIONSHIP(FROM ResolvedEntity, TO ResolvedEntity) WITH REVERSE_EDGE="reverse_RESOLVED_RELATIONSHIP"; // store edges between entities after they're resolved
+    ADD DIRECTED EDGE IN_COMMUNITY(FROM ResolvedEntity, TO Community) WITH REVERSE_EDGE="reverse_IN_COMMUNITY";
 
     // TODO: louvain will be run on resolved entities, but stored in community then on communities until louvain runs out
     // Hierarchical communities (Louvain/Leiden)
diff --git a/common/logs/logwriter.py b/common/logs/logwriter.py
index ff13feed..f75be00c 100644
--- a/common/logs/logwriter.py
+++ b/common/logs/logwriter.py
@@ -142,7 +142,7 @@ def log(level, message, mask_pii=True, **kwargs):
             LogWriter.general_logger.info(message)
 
     @staticmethod
-    def info(message, mask_pii=True, **kwargs):
+    def info(message, mask_pii=False, **kwargs):
         LogWriter.log("info", message, mask_pii, **kwargs)
 
     @staticmethod
diff --git a/common/py_schemas/tool_io_schemas.py b/common/py_schemas/tool_io_schemas.py
index 1fe16de4..1ea6ed3e 100644
--- a/common/py_schemas/tool_io_schemas.py
+++ b/common/py_schemas/tool_io_schemas.py
@@ -91,4 +91,4 @@ class ReportSection(BaseModel):
     questions: List[ReportQuestion] = Field("List of questions and reasoning for the section")
 
 class ReportSections(BaseModel):
-    sections: List[ReportSection] = Field("List of sections for the report")
\ No newline at end of file
+    sections: List[ReportSection] = Field("List of sections for the report")
diff --git a/eventual-consistency-service/app/graphrag/graph_rag.py b/eventual-consistency-service/app/graphrag/graph_rag.py
index 96a591bc..e248510c 100644
--- a/eventual-consistency-service/app/graphrag/graph_rag.py
+++ b/eventual-consistency-service/app/graphrag/graph_rag.py
@@ -2,100 +2,59 @@
 import logging
 import time
 
-import ecc_util
+import httpx
 from aiochannel import Channel
-from graphrag.util import chunk_doc, install_query, stream_docs
+from graphrag import workers
+from graphrag.util import init, make_headers, stream_doc_ids,http_timeout
 from pyTigerGraph import TigerGraphConnection
 
-from common.chunkers.base_chunker import BaseChunker
-from common.config import (
-    doc_processing_config,
-    embedding_service,
-    get_llm_service,
-    llm_config,
-    milvus_config,
-)
+from common.config import embedding_service
 from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore
-from common.extractors import GraphExtractor, LLMEntityRelationshipExtractor
 from common.extractors.BaseExtractor import BaseExtractor
 
+http_logs = logging.getLogger("httpx")
+http_logs.setLevel(logging.WARNING)
 logger = logging.getLogger(__name__)
+
 consistency_checkers = {}
 
 
-async def install_queries(
-    requried_queries: list[str], conn: TigerGraphConnection, n_workers=8
+async def stream_docs(
+    conn: TigerGraphConnection,
+    docs_chan: Channel,
+    ttl_batches: int = 10,
 ):
-    # queries that are currently installed
-    installed_queries = [q.split("/")[-1] for q in conn.getEndpoints(dynamic=True)]
-
-    tasks = []
-    async with asyncio.TaskGroup() as grp:
-        for q in requried_queries:
-            async with asyncio.Semaphore(n_workers):
-                q_name = q.split("/")[-1]
-                # if the query is not installed, install it
-                if q_name not in installed_queries:
-                    task = grp.create_task(install_query(conn, q))
-                    tasks.append(task)
-
-    for t in tasks:
-        print(t.result())
-        # TODO: Check if anything had an error
-    return "", "", ""
-
-
-async def init(
-    graphname: str, conn: TigerGraphConnection
-) -> tuple[BaseChunker, dict[str, MilvusEmbeddingStore], BaseExtractor]:
-    # install requried queries
-    requried_queries = [
-        # "common/gsql/supportai/Scan_For_Updates",
-        # "common/gsql/supportai/Update_Vertices_Processing_Status",
-        # "common/gsql/supportai/ECC_Status",
-        # "common/gsql/supportai/Check_Nonexistent_Vertices",
-        "common/gsql/graphRAG/StreamDocIds",
-        "common/gsql/graphRAG/StreamDocContent",
-    ]
-    # await install_queries(requried_queries, conn)
-    return await install_queries(requried_queries, conn)
-
-    # init processing tools
-    chunker = ecc_util.get_chunker()
-
-    vector_indices = {}
-    vertex_field = milvus_config.get("vertex_field", "vertex_id")
-    index_names = milvus_config.get(
-        "indexes",
-        ["Document", "DocumentChunk", "Entity", "Relationship"],
-    )
-    for index_name in index_names:
-        vector_indices[graphname + "_" + index_name] = MilvusEmbeddingStore(
-            embedding_service,
-            host=milvus_config["host"],
-            port=milvus_config["port"],
-            support_ai_instance=True,
-            collection_name=graphname + "_" + index_name,
-            username=milvus_config.get("username", ""),
-            password=milvus_config.get("password", ""),
-            vector_field=milvus_config.get("vector_field", "document_vector"),
-            text_field=milvus_config.get("text_field", "document_content"),
-            vertex_field=vertex_field,
-        )
-
-    if doc_processing_config.get("extractor") == "llm":
-        extractor = GraphExtractor()
-    elif doc_processing_config.get("extractor") == "llm":
-        extractor = LLMEntityRelationshipExtractor(get_llm_service(llm_config))
-    else:
-        raise ValueError("Invalid extractor type")
-
-    if vertex_field is None:
-        raise ValueError(
-            "vertex_field is not defined. Ensure Milvus is enabled in the configuration."
-        )
-
-    return chunker, vector_indices, extractor
+    """
+    Streams the document contents into the docs_chan
+    """
+    logger.info("streaming docs")
+    headers = make_headers(conn)
+    for i in range(ttl_batches):
+        doc_ids = await stream_doc_ids(conn, i, ttl_batches)
+        if doc_ids["error"]:
+            continue  # TODO: handle error
+
+        logger.info("********doc_ids")
+        logger.info(doc_ids)
+        logger.info("********")
+        for d in doc_ids["ids"]:
+            async with httpx.AsyncClient(timeout=http_timeout) as client:
+                res = await client.get(
+                    f"{conn.restppUrl}/query/{conn.graphname}/StreamDocContent/",
+                    params={"doc": d},
+                    headers=headers,
+                )
+                # TODO: check for errors
+                # this will block and wait if the channel is full
+                logger.info("steam_docs writes to docs")
+                await docs_chan.put(res.json()["results"][0]["DocContent"][0])
+            # break  # single doc test FIXME: delete
+        # break  # single batch test FIXME: delete
+
+    logger.info("stream_docs done")
+    # close the docs chan -- this function is the only sender
+    logger.info("****** closing docs chan")
+    docs_chan.close()
 
 
 async def chunk_docs(
@@ -109,100 +68,120 @@ async def chunk_docs(
     Creates and starts one worker for each document
     in the docs channel.
     """
+    logger.info("Reading from docs channel")
     doc_tasks = []
     async with asyncio.TaskGroup() as grp:
         async for content in docs_chan:
-            await embed_chan.put(content)  # send the document to be embedded
+            logger.info("*********reading from docs chan")
+            # continue
+            v_id = content["v_id"]
+            txt = content["attributes"]["text"]
+            # send the document to be embedded
+            logger.info("chunk writes to extract")
+            await embed_chan.put((v_id, txt, "Document"))
+
             task = grp.create_task(
-                chunk_doc(conn, content, upsert_chan, embed_chan, extract_chan)
+                workers.chunk_doc(conn, content, upsert_chan, embed_chan, extract_chan)
             )
             doc_tasks.append(task)
             # break  # single doc  FIXME: delete
+            logger.info("*********done reading from docs chan")
 
+    logger.info("chunk_docs done")
     # do something with doc_tasks?
-    for t in doc_tasks:
-        print(t.result())
-
-    # FIXME: don't close these there, other functions will send to them
-    upsert_chan.close()
-    embed_chan.close()
+    # for t in doc_tasks:
+    # logger.info(t.result())
 
     # close the extract chan -- chunk_doc is the only sender
-    # and chunk_doc calls are kicked off from here (this is technically the sender)
+    # and chunk_doc calls are kicked off from here
+    logger.info("********closing extract chan")
     extract_chan.close()
 
 
 async def upsert(upsert_chan: Channel):
     """
     Creates and starts one worker for each upsert job
-    queue expects:
+    chan expects:
     (func, args) <- q.get()
     """
 
+    logger.info("Reading from upsert channel")
     # consume task queue
     upsert_tasks = []
     async with asyncio.TaskGroup() as grp:
         async for func, args in upsert_chan:
-            # print("func name >>>>>", func.__name__, args)
-            # grp.create_task(todo())
+            logger.info("*********reading from upsert chan")
+            logger.info(f"{func.__name__}, {args[1]}")
             # continue
-
             # execute the task
             t = grp.create_task(func(*args))
             upsert_tasks.append(t)
+            logger.info("*********done reading from upsert chan")
 
-    print(f"upsert done")
+    logger.info(f"upsert done")
     # do something with doc_tasks?
-    for t in upsert_tasks:
-        print(t.result())
+    # for t in upsert_tasks:
+    #     logger.info(t.result())
 
 
-async def embed(embed_chan: Channel):
+async def embed(
+    embed_chan: Channel, index_stores: dict[str, MilvusEmbeddingStore], graphname: str
+):
     """
     Creates and starts one worker for each embed job
+    chan expects:
+    (v_id, content, index_name) <- q.get()
     """
-
-    # consume task queue
-    responses = []
+    logger.info("Reading from embed channel")
     async with asyncio.TaskGroup() as grp:
-        async for item in embed_chan:
-            print("embed item>>>>>", type(item))
-            grp.create_task(todo())
-            continue
-            # execute the task
-            # response = await func(*args)
-
-            # append task results to worker results/response
-            # responses.append(response)
+        # consume task queue
+        async for v_id, content, index_name in embed_chan:
+            logger.info("*********reading from embed chan")
+            # continue
+            embedding_store = index_stores[f"{graphname}_{index_name}"]
+            logger.info(f"Embed to {graphname}_{index_name}: {v_id}")
+            grp.create_task(
+                workers.embed(
+                    embedding_service,
+                    embedding_store,
+                    v_id,
+                    content,
+                )
+            )
+            logger.info("*********done reading from embed chan")
 
-    print(f"embed done")
-    return responses
+    logger.info(f"embed done")
 
 
-async def extract(extract_chan: Channel):
+async def extract(
+    extract_chan: Channel,
+    upsert_chan: Channel,
+    embed_chan: Channel,
+    extractor: BaseExtractor,
+    conn: TigerGraphConnection,
+):
     """
     Creates and starts one worker for each extract job
+    chan expects:
+    (chunk , chunk_id) <- q.get()
     """
-
+    logger.info("Reading from extract channel")
     # consume task queue
-    responses = []
     async with asyncio.TaskGroup() as grp:
         async for item in extract_chan:
-            print("extract item>>>>>", type(item))
-            grp.create_task(todo())
-            continue
-            # execute the task
-            # response = await func(*args)
-
+            logger.info("*********reading from extract chan")
+            logger.info("*********done reading from extract chan")
+            grp.create_task(
+                workers.extract(upsert_chan, embed_chan, extractor, conn, *item)
+            )
             # append task results to worker results/response
-            # responses.append(response)
+            logger.info("*********done reading from extract chan")
 
-    print(f"embed done")
-    return responses
+    logger.info(f"extract done")
 
-
-async def todo():
-    await asyncio.sleep(1)
+    logger.info("****closing upsert and embed chan")
+    upsert_chan.close()
+    embed_chan.close()
 
 
 async def run(graphname: str, conn: TigerGraphConnection):
@@ -219,14 +198,13 @@ async def run(graphname: str, conn: TigerGraphConnection):
 
     """
 
-    # init configurable objects
-    await init(graphname, conn)
+    extractor, index_stores = await init(conn)
     # return
     start = time.perf_counter()
 
     # TODO: make configurable
     tasks = []
-    docs_chan = Channel(15)  # process n chunks at a time max
+    docs_chan = Channel(1)  # process n chunks at a time max
     embed_chan = Channel(100)
     upsert_chan = Channel(100)
     extract_chan = Channel(100)
@@ -243,12 +221,14 @@ async def run(graphname: str, conn: TigerGraphConnection):
         t = grp.create_task(upsert(upsert_chan))
         tasks.append(t)
         # # embed
-        t = grp.create_task(embed(embed_chan))
+        t = grp.create_task(embed(embed_chan, index_stores, graphname))
         tasks.append(t)
         # extract entities
-        t = grp.create_task(extract(extract_chan))
+        t = grp.create_task(
+            extract(extract_chan, upsert_chan, embed_chan, extractor, conn)
+        )
         tasks.append(t)
     end = time.perf_counter()
 
-    print("DONE")
-    print(end - start)
+    logger.info("DONE")
+    logger.info(end - start)
diff --git a/eventual-consistency-service/app/graphrag/util.py b/eventual-consistency-service/app/graphrag/util.py
index cfb84e5a..3fb8f916 100644
--- a/eventual-consistency-service/app/graphrag/util.py
+++ b/eventual-consistency-service/app/graphrag/util.py
@@ -1,58 +1,117 @@
+import asyncio
 import base64
 import json
-import time
+import logging
 import traceback
-from urllib.parse import quote_plus
 
-import ecc_util
 import httpx
-from aiochannel import Channel
+from graphrag import workers
 from pyTigerGraph import TigerGraphConnection
 
+from common.config import (
+    doc_processing_config,
+    embedding_service,
+    get_llm_service,
+    llm_config,
+    milvus_config,
+)
+from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore
+from common.extractors import GraphExtractor, LLMEntityRelationshipExtractor
+from common.extractors.BaseExtractor import BaseExtractor
 from common.logs.logwriter import LogWriter
 
+logger = logging.getLogger(__name__)
+http_timeout = httpx.Timeout(15.0)
 
-def make_headers(conn: TigerGraphConnection):
-    if conn.apiToken is None or conn.apiToken == "":
-        tkn = base64.b64encode(f"{conn.username}:{conn.password}".encode()).decode()
-        headers = {"Authorization": f"Basic {tkn}"}
-    else:
-        headers = {"Authorization": f"Bearer {conn.apiToken}"}
 
-    return headers
+async def install_queries(
+    requried_queries: list[str], conn: TigerGraphConnection, n_workers=8
+):
+    # queries that are currently installed
+    installed_queries = [q.split("/")[-1] for q in conn.getEndpoints(dynamic=True)]
 
+    tasks = []
+    async with asyncio.TaskGroup() as grp:
+        for q in requried_queries:
+            async with asyncio.Semaphore(n_workers):
+                q_name = q.split("/")[-1]
+                # if the query is not installed, install it
+                if q_name not in installed_queries:
+                    task = grp.create_task(workers.install_query(conn, q))
+                    tasks.append(task)
 
-async def install_query(
-    conn: TigerGraphConnection, query_path: str
-) -> dict[str, httpx.Response | str | None]:
-    LogWriter.info(f"Installing query {query_path}")
-    with open(f"{query_path}.gsql", "r") as f:
-        query = f.read()
+    for t in tasks:
+        logger.info(t.result())
+        # TODO: Check if anything had an error
 
-    query_name = query_path.split("/")[-1]
-    query = f"""\
-USE GRAPH {conn.graphname}
-{query}
-INSTALL QUERY {query_name}"""
-    tkn = base64.b64encode(f"{conn.username}:{conn.password}".encode()).decode()
-    headers = {"Authorization": f"Basic {tkn}"}
 
-    async with httpx.AsyncClient(timeout=None) as client:
-        res = await client.post(
-            conn.gsUrl + "/gsqlserver/gsql/file",
-            data=quote_plus(query.encode("utf-8")),
-            headers=headers,
+async def init(
+    conn: TigerGraphConnection,
+) -> tuple[BaseExtractor, dict[str, MilvusEmbeddingStore]]:
+    # install requried queries
+    requried_queries = [
+        # "common/gsql/supportai/Scan_For_Updates",
+        # "common/gsql/supportai/Update_Vertices_Processing_Status",
+        # "common/gsql/supportai/ECC_Status",
+        # "common/gsql/supportai/Check_Nonexistent_Vertices",
+        "common/gsql/graphRAG/StreamDocIds",
+        "common/gsql/graphRAG/StreamDocContent",
+    ]
+    await install_queries(requried_queries, conn)
+
+    # extractor
+    if doc_processing_config.get("extractor") == "graphrag":
+        extractor = GraphExtractor()
+    elif doc_processing_config.get("extractor") == "llm":
+        extractor = LLMEntityRelationshipExtractor(get_llm_service(llm_config))
+    else:
+        raise ValueError("Invalid extractor type")
+    vertex_field = milvus_config.get("vertex_field", "vertex_id")
+    index_names = milvus_config.get(
+        "indexes",
+        [
+            "Document",
+            "DocumentChunk",
+            "Entity",
+            "Relationship",
+            # "Concept",
+        ],
+    )
+    index_stores = {}
+    content = "init"
+    # TODO:do concurrently 
+    for index_name in index_names:
+        name = conn.graphname + "_" + index_name
+        s = MilvusEmbeddingStore(
+            embedding_service,
+            host=milvus_config["host"],
+            port=milvus_config["port"],
+            support_ai_instance=True,
+            collection_name=name,
+            username=milvus_config.get("username", ""),
+            password=milvus_config.get("password", ""),
+            vector_field=milvus_config.get("vector_field", "document_vector"),
+            text_field=milvus_config.get("text_field", "document_content"),
+            vertex_field=vertex_field,
         )
+        # TODO: only do this if collection doesn't exist
+        vec = embedding_service.embed_query(content)
+        LogWriter.info(f"Initializing {name}")
+        s.add_embeddings([(content, vec)], [{vertex_field: content}])
+        s.remove_embeddings(expr=f"{vertex_field} in ['{content}']")
+        index_stores[name] = s
 
-    if "error" in res.text.lower():
-        LogWriter.error(res.text)
-        return {
-            "result": None,
-            "error": True,
-            "message": f"Failed to install query {query_name}",
-        }
+    return extractor, index_stores
 
-    return {"result": res, "error": False}
+
+def make_headers(conn: TigerGraphConnection):
+    if conn.apiToken is None or conn.apiToken == "":
+        tkn = base64.b64encode(f"{conn.username}:{conn.password}".encode()).decode()
+        headers = {"Authorization": f"Basic {tkn}"}
+    else:
+        headers = {"Authorization": f"Bearer {conn.apiToken}"}
+
+    return headers
 
 
 async def stream_doc_ids(
@@ -61,7 +120,7 @@ async def stream_doc_ids(
     headers = make_headers(conn)
 
     try:
-        async with httpx.AsyncClient(timeout=None) as client:
+        async with httpx.AsyncClient(timeout=http_timeout) as client:
             res = await client.post(
                 f"{conn.restppUrl}/query/{conn.graphname}/StreamDocIds",
                 params={
@@ -82,71 +141,6 @@ async def stream_doc_ids(
         return {"error": True, "message": str(e)}
 
 
-async def stream_docs(
-    conn: TigerGraphConnection,
-    docs_chan: Channel,
-    ttl_batches: int = 10,
-):
-    """
-    Streams the document contents into the docs_chan
-    """
-    headers = make_headers(conn)
-    for i in range(ttl_batches):
-        doc_ids = await stream_doc_ids(conn, i, ttl_batches)
-        if doc_ids["error"]:
-            break  # TODO: handle error
-
-        print("********")
-        print(doc_ids)
-        print("********")
-        for d in doc_ids["ids"]:
-            async with httpx.AsyncClient(timeout=None) as client:
-                res = await client.get(
-                    f"{conn.restppUrl}/query/{conn.graphname}/StreamDocContent/",
-                    params={"doc": d},
-                    headers=headers,
-                )
-                # TODO: check for errors
-                # this will block and wait if the channel is full
-                await docs_chan.put(res.json()["results"][0]["DocContent"][0])
-        #     break  # single doc test FIXME: delete
-        # break  # single batch test FIXME: delete
-
-    # close the docs chan -- this function is the only sender
-    docs_chan.close()
-
-
-async def chunk_doc(
-    conn: TigerGraphConnection,
-    doc: dict[str, str],
-    upsert_chan: Channel,
-    embed_chan: Channel,
-    extract_chan: Channel,
-):
-    """
-    Chunks a document.
-    Places the resulting chunks into the upsert channel (to be upserted to TG)
-    and the embed channel (to be embedded and written to the vector store)
-    """
-    chunker = ecc_util.get_chunker()
-    chunks = chunker.chunk(doc["attributes"]["text"])
-    v_id = doc["v_id"]
-    # TODO: n chunks at a time
-    for i, chunk in enumerate(chunks):
-        # send chunks to be upserted (func, args)
-        await upsert_chan.put((upsert_chunk, (conn, v_id, f"{v_id}_chunk_{i}", chunk)))
-
-        # send chunks to be embedded
-        await embed_chan.put(chunk)
-
-        # send chunks to have entities extracted
-        await extract_chan.put(chunk)
-
-        # break  # single chunk FIXME: delete
-
-    return doc["v_id"]
-
-
 def map_attrs(attributes: dict):
     # map attrs
     attrs = {}
@@ -171,11 +165,13 @@ async def upsert_vertex(
     attrs = map_attrs(attributes)
     data = json.dumps({"vertices": {vertex_type: {vertex_id: attrs}}})
     headers = make_headers(conn)
-    async with httpx.AsyncClient(timeout=None) as client:
+    # print("upsert vertex>>>", vertex_id)
+    async with httpx.AsyncClient(timeout=http_timeout) as client:
         res = await client.post(
             f"{conn.restppUrl}/graph/{conn.graphname}", data=data, headers=headers
         )
-        print("upsert vertex>>>", res.json())
+
+        res.raise_for_status()
 
 
 async def upsert_edge(
@@ -207,37 +203,9 @@ async def upsert_edge(
         }
     )
     headers = make_headers(conn)
-    async with httpx.AsyncClient(timeout=None) as client:
+    # print("upsert edge >>>", src_v_id, tgt_v_id)
+    async with httpx.AsyncClient(timeout=http_timeout) as client:
         res = await client.post(
             f"{conn.restppUrl}/graph/{conn.graphname}", data=data, headers=headers
         )
-        print("upsert edge >>>", res.json())
-
-
-async def upsert_chunk(conn: TigerGraphConnection, doc_id, chunk_id, chunk):
-    date_added = int(time.time())
-    await upsert_vertex(
-        conn,
-        "DocumentChunk",
-        chunk_id,
-        attributes={"epoch_added": date_added, "idx": int(chunk_id.split("_")[-1])},
-    )
-    await upsert_vertex(
-        conn,
-        "Content",
-        chunk_id,
-        attributes={"text": chunk, "epoch_added": date_added},
-    )
-    await upsert_edge(
-        conn, "DocumentChunk", chunk_id, "HAS_CONTENT", "Content", chunk_id
-    )
-    await upsert_edge(conn, "Document", doc_id, "HAS_CHILD", "DocumentChunk", chunk_id)
-    if int(chunk_id.split("_")[-1]) > 0:
-        await upsert_edge(
-            conn,
-            "DocumentChunk",
-            chunk_id,
-            "IS_AFTER",
-            "DocumentChunk",
-            doc_id + "_chunk_" + str(int(chunk_id.split("_")[-1]) - 1),
-        )
+        res.raise_for_status()
diff --git a/eventual-consistency-service/app/graphrag/worker.py b/eventual-consistency-service/app/graphrag/worker.py
deleted file mode 100644
index 40720deb..00000000
--- a/eventual-consistency-service/app/graphrag/worker.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import asyncio
-
-from aiochannel import Channel
-
-
-async def worker(
-    n: int,
-    task_queue: Channel,
-):
-    # init worker logging/reporting (TODO)
-    worker_name = f"worker-{n+1}"
-    worker_name += " " if n + 1 < 10 else ""
-
-    while task_queue.empty():
-        print(f"{worker_name} waiting")
-        await asyncio.sleep(1)
-
-    # consume task queue
-    print(f"{worker_name} started")
-    responses = []
-    while not task_queue.empty():
-        # get the next task
-        func, args = await task_queue.get()
-
-        # execute the task
-        response = await func(*args)
-
-        # append task results to worker results/response
-        responses.append(response)
-
-        # mark task as done
-        task_queue.task_done()
-
-    print(f"{worker_name} done")
-    return responses
diff --git a/eventual-consistency-service/app/graphrag/workers.py b/eventual-consistency-service/app/graphrag/workers.py
new file mode 100644
index 00000000..3eb0d0dd
--- /dev/null
+++ b/eventual-consistency-service/app/graphrag/workers.py
@@ -0,0 +1,226 @@
+import base64
+import logging
+import time
+from urllib.parse import quote_plus
+
+import ecc_util
+import httpx
+from aiochannel import Channel
+from graphrag import util  # import upsert_edge, upsert_vertex
+from langchain_community.graphs.graph_document import GraphDocument
+from pyTigerGraph import TigerGraphConnection
+
+from common.config import milvus_config
+from common.embeddings.embedding_services import EmbeddingModel
+from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore
+from common.extractors.BaseExtractor import BaseExtractor
+from common.logs.logwriter import LogWriter
+
+vertex_field = milvus_config.get("vertex_field", "vertex_id")
+
+logger = logging.getLogger(__name__)
+
+
+async def install_query(
+    conn: TigerGraphConnection, query_path: str
+) -> dict[str, httpx.Response | str | None]:
+    LogWriter.info(f"Installing query {query_path}")
+    with open(f"{query_path}.gsql", "r") as f:
+        query = f.read()
+
+    query_name = query_path.split("/")[-1]
+    query = f"""\
+USE GRAPH {conn.graphname}
+{query}
+INSTALL QUERY {query_name}"""
+    tkn = base64.b64encode(f"{conn.username}:{conn.password}".encode()).decode()
+    headers = {"Authorization": f"Basic {tkn}"}
+
+    async with httpx.AsyncClient(timeout=util.http_timeout) as client:
+        res = await client.post(
+            conn.gsUrl + "/gsqlserver/gsql/file",
+            data=quote_plus(query.encode("utf-8")),
+            headers=headers,
+        )
+
+    if "error" in res.text.lower():
+        LogWriter.error(res.text)
+        return {
+            "result": None,
+            "error": True,
+            "message": f"Failed to install query {query_name}",
+        }
+
+    return {"result": res, "error": False}
+
+
+async def chunk_doc(
+    conn: TigerGraphConnection,
+    doc: dict[str, str],
+    upsert_chan: Channel,
+    embed_chan: Channel,
+    extract_chan: Channel,
+):
+    """
+    Chunks a document.
+    Places the resulting chunks into the upsert channel (to be upserted to TG)
+    and the embed channel (to be embedded and written to the vector store)
+    """
+    chunker = ecc_util.get_chunker()
+    chunks = chunker.chunk(doc["attributes"]["text"])
+    v_id = doc["v_id"]
+    logger.info(f"Chunking {v_id}")
+    # TODO: n chunks at a time
+    for i, chunk in enumerate(chunks):
+        chunk_id = f"{v_id}_chunk_{i}"
+        # send chunks to be upserted (func, args)
+        logger.info("chunk writes to upsert")
+        await upsert_chan.put((upsert_chunk, (conn, v_id, chunk_id, chunk)))
+
+        # send chunks to be embedded
+        logger.info("chunk writes to embed")
+        await embed_chan.put((v_id, chunk, "DocumentChunk"))
+
+        # send chunks to have entities extracted
+        logger.info("chunk writes to extract")
+        await extract_chan.put((chunk, chunk_id))
+
+    return doc["v_id"]
+
+
+async def upsert_chunk(conn: TigerGraphConnection, doc_id, chunk_id, chunk):
+    logger.info(f"Upserting chunk {chunk_id}")
+    logger.info(f"Upserting chunk {chunk_id}")
+    date_added = int(time.time())
+    await util.upsert_vertex(
+        conn,
+        "DocumentChunk",
+        chunk_id,
+        attributes={"epoch_added": date_added, "idx": int(chunk_id.split("_")[-1])},
+    )
+    await util.upsert_vertex(
+        conn,
+        "Content",
+        chunk_id,
+        attributes={"text": chunk, "epoch_added": date_added},
+    )
+    await util.upsert_edge(
+        conn, "DocumentChunk", chunk_id, "HAS_CONTENT", "Content", chunk_id
+    )
+    await util.upsert_edge(
+        conn, "Document", doc_id, "HAS_CHILD", "DocumentChunk", chunk_id
+    )
+    if int(chunk_id.split("_")[-1]) > 0:
+        await util.upsert_edge(
+            conn,
+            "DocumentChunk",
+            chunk_id,
+            "IS_AFTER",
+            "DocumentChunk",
+            doc_id + "_chunk_" + str(int(chunk_id.split("_")[-1]) - 1),
+        )
+
+
+async def embed(
+    embed_svc: EmbeddingModel,
+    embed_store: MilvusEmbeddingStore,
+    v_id: str,
+    content: str,
+):
+    """
+    Args:
+        graphname: str
+            the name of the graph the documents are in
+        embed_svc: EmbeddingModel
+            The class used to vectorize text
+        embed_store:
+            The class used to store the vectore to a vector DB
+        v_id: str
+            the vertex id that will be embedded
+        content: str
+            the content of the document/chunk
+        index_name: str
+            the vertex index to write to
+    """
+    logger.info(f"Embedding {v_id}, {content}")
+
+    vec = await embed_svc.aembed_query(content)
+    await embed_store.aadd_embeddings([(content, vec)], [{vertex_field: v_id}])
+
+
+async def extract(
+    upsert_chan: Channel,
+    embed_chan: Channel,
+    extractor: BaseExtractor,
+    conn: TigerGraphConnection,
+    chunk: str,
+    chunk_id: str,
+):
+    logger.info(f"Extracting chunk: {chunk_id}")
+    extracted: list[GraphDocument] = await extractor.aextract(chunk)
+    # upsert nodes and edges to the graph
+    for doc in extracted:
+        for node in doc.nodes:
+            logger.info("extract writes entity vert to upsert")
+            logger.info(f"Node: {node.id}| props: {node.properties}")
+            v_id = str(node.id)
+            desc = node.properties.get("description", "")
+            await upsert_chan.put(
+                (
+                    util.upsert_vertex,  # func to call
+                    # conn, v_id, chunk_id, chunk
+                    (
+                        conn,
+                        "Entity",  # v_type
+                        v_id,  # v_id
+                        {  # attrs
+                            "description": desc,
+                            "epoch_added": int(time.time()),
+                        },
+                    ),
+                )
+            )
+
+            # link the entity to the chunk it came from
+            logger.info("extract writes contains edge to upsert")
+            await upsert_chan.put(
+                (
+                    util.upsert_edge,
+                    (
+                        conn,
+                        "DocumentChunk",  # src_type
+                        chunk_id,  # src_id
+                        "CONTAINS_ENTITY",  # edge_type
+                        "Entity",  # tgt_type
+                        str(node.id),  # tgt_id
+                        None,  # attributes
+                    ),
+                )
+            )
+
+            # embed the entity
+            # (v_id, content, index_name)
+            await embed_chan.put((v_id, desc, "Entity"))
+
+        for edge in doc.relationships:
+            logger.info("extract writes relates edge to upsert")
+            logger.info(f"{edge}")
+            await upsert_chan.put(
+                (
+                    util.upsert_edge,
+                    (
+                        conn,
+                        "Entity",  # src_type
+                        edge.source.id,  # src_id
+                        "RELATIONSHIP",  # edgeType
+                        "Entity",  # tgt_type
+                        edge.target.id,  # tgt_id
+                        {"relation_type": edge.type},  # attributes
+                    ),
+                )
+            )
+            # embed "Relationship",
+            # (v_id, content, index_name)
+
+    # TODO:
+    # embed the extracted entities
diff --git a/eventual-consistency-service/app/main.py b/eventual-consistency-service/app/main.py
index 0277a272..85a1f8ae 100644
--- a/eventual-consistency-service/app/main.py
+++ b/eventual-consistency-service/app/main.py
@@ -188,7 +188,8 @@ def consistency_status(
         case SupportAIMethod.GRAPHRAG:
             background.add_task(graphrag.run, graphname, conn)
             # asyncio.run(graphrag.run(graphname, conn))
-            ecc_status = f"hi from graph rag ecc: {conn.graphname} ({graphname})"
+            import time
+            ecc_status = f"hi from graph rag ecc: {conn.graphname} ({graphname}) {time.ctime()}"
         case _:
             response.status_code = status.HTTP_404_NOT_FOUND
             return f"Method unsupported, must be {SupportAIMethod.SUPPORTAI}, {SupportAIMethod.GRAPHRAG}"

From bb37198f74e1012880868d44f8d6cbfa09acbfb0 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 30 Jul 2024 12:36:22 -0400
Subject: [PATCH 07/53] docs processing done -- start community passes

---
 common/embeddings/embedding_services.py       |   2 +-
 common/gsql/graphRAG/StreamDocContent.gsql    |   2 +
 common/gsql/graphRAG/StreamDocIds.gsql        |   7 +-
 copilot/docs/notebooks/graphrag.ipynb         | 159 ++++--------------
 .../app/graphrag/graph_rag.py                 |  69 +++-----
 .../app/graphrag/util.py                      |  63 ++++---
 .../app/graphrag/workers.py                   |  30 ++--
 7 files changed, 118 insertions(+), 214 deletions(-)

diff --git a/common/embeddings/embedding_services.py b/common/embeddings/embedding_services.py
index dd506670..7ce17478 100644
--- a/common/embeddings/embedding_services.py
+++ b/common/embeddings/embedding_services.py
@@ -136,7 +136,7 @@ def __init__(self, config):
         )
         from langchain.embeddings import OpenAIEmbeddings
 
-        self.embeddings = OpenAIEmbeddings()
+        self.embeddings = OpenAIEmbeddings().aembed_query
 
 
 class VertexAI_PaLM_Embedding(EmbeddingModel):
diff --git a/common/gsql/graphRAG/StreamDocContent.gsql b/common/gsql/graphRAG/StreamDocContent.gsql
index 87f12566..a2845148 100644
--- a/common/gsql/graphRAG/StreamDocContent.gsql
+++ b/common/gsql/graphRAG/StreamDocContent.gsql
@@ -1,5 +1,7 @@
 CREATE DISTRIBUTED QUERY StreamDocContent(Vertex<Document> doc) {
     Doc = {doc};
+
+    // Get the document's content and mark it as processed
     DocContent = SELECT c FROM Doc:d -(HAS_CONTENT)-> Content:c
                  POST-ACCUM d.epoch_processed = datetime_to_epoch(now());
     PRINT DocContent;
diff --git a/common/gsql/graphRAG/StreamDocIds.gsql b/common/gsql/graphRAG/StreamDocIds.gsql
index d5ec982e..2fb4a9c4 100644
--- a/common/gsql/graphRAG/StreamDocIds.gsql
+++ b/common/gsql/graphRAG/StreamDocIds.gsql
@@ -1,13 +1,16 @@
 CREATE DISTRIBUTED QUERY StreamDocIds(INT current_batch, INT ttl_batches) {
+    /* 
+     * Get the IDs of documents that have not already been processed (one
+     * batch at a time)
+     */
     ListAccum<STRING> @@doc_ids;
     Docs = {Document.*};
 
     Docs = SELECT d FROM Docs:d
            WHERE vertex_to_int(d) % ttl_batches == current_batch
              AND d.epoch_processed == 0
-             AND d.epoch_processing == 0 
            ACCUM @@doc_ids += d.id
-           POST-ACCUM d.epoch_processing = datetime_to_epoch(now());
+           POST-ACCUM d.epoch_processing = datetime_to_epoch(now()); // set the processing time
     
     PRINT @@doc_ids;
 }
diff --git a/copilot/docs/notebooks/graphrag.ipynb b/copilot/docs/notebooks/graphrag.ipynb
index 57ea4b48..38b4939b 100644
--- a/copilot/docs/notebooks/graphrag.ipynb
+++ b/copilot/docs/notebooks/graphrag.ipynb
@@ -45,7 +45,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -54,18 +54,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "{'host_name': 'https://algotesting.i.tgcloud.io',\n",
-       " 'schema_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_schema].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_schema\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 0)\\\\nTrying to add local vertex \\'DocumentChunk\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Document\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Concept\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Entity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Relationship\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'DocumentCollection\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Content\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Community\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'ResolvedEntity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CONTENT\\' and its reverse edge \\'reverse_HAS_CONTENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_CHILD_OF\\' and its reverse edge \\'reverse_IS_CHILD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_HEAD_OF\\' and its reverse edge \\'reverse_IS_HEAD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_TAIL\\' and its reverse edge \\'reverse_HAS_TAIL\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_RELATIONSHIP\\' and its reverse edge \\'reverse_DESCRIBES_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_ENTITY\\' and its reverse edge \\'reverse_DESCRIBES_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_ENTITY\\' and its reverse edge \\'reverse_CONTAINS_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'MENTIONS_RELATIONSHIP\\' and its reverse edge \\'reverse_MENTIONS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_AFTER\\' and its reverse edge \\'reverse_IS_AFTER\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CHILD\\' and its reverse edge \\'reverse_HAS_CHILD\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_RELATIONSHIP\\' and its reverse edge \\'reverse_HAS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_DOCUMENT\\' and its reverse edge \\'reverse_CONTAINS_DOCUMENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'KNN\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVES_TO\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVED_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IN_COMMUNITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 1\\\\nThe job add_supportai_schema completes in 2.434 seconds!\\\\nLocal schema change succeeded.\"',\n",
-       " 'index_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_indexes].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_indexes\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 1)\\\\nTrying to add index \\'doc_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_indexepoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 2\\\\nThe job add_supportai_indexes completes in 1.932 seconds!\\\\nLocal schema change succeeded.\"'}"
+       " 'schema_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_schema].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_schema\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 0)\\\\nTrying to add local vertex \\'DocumentChunk\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Document\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Concept\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Entity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Relationship\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'DocumentCollection\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Content\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Community\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'ResolvedEntity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CONTENT\\' and its reverse edge \\'reverse_HAS_CONTENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_CHILD_OF\\' and its reverse edge \\'reverse_IS_CHILD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_HEAD_OF\\' and its reverse edge \\'reverse_IS_HEAD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_TAIL\\' and its reverse edge \\'reverse_HAS_TAIL\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_RELATIONSHIP\\' and its reverse edge \\'reverse_DESCRIBES_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_ENTITY\\' and its reverse edge \\'reverse_DESCRIBES_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_ENTITY\\' and its reverse edge \\'reverse_CONTAINS_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'MENTIONS_RELATIONSHIP\\' and its reverse edge \\'reverse_MENTIONS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_AFTER\\' and its reverse edge \\'reverse_IS_AFTER\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CHILD\\' and its reverse edge \\'reverse_HAS_CHILD\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_RELATIONSHIP\\' and its reverse edge \\'reverse_HAS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_DOCUMENT\\' and its reverse edge \\'reverse_CONTAINS_DOCUMENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RELATIONSHIP\\' and its reverse edge \\'reverse_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVES_TO\\' and its reverse edge \\'reverse_RESOLVES_TO\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVED_RELATIONSHIP\\' and its reverse edge \\'reverse_RESOLVED_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IN_COMMUNITY\\' and its reverse edge \\'reverse_IN_COMMUNITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 1\\\\nThe job add_supportai_schema completes in 2.335 seconds!\\\\nLocal schema change succeeded.\"',\n",
+       " 'index_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_indexes].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_indexes\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 1)\\\\nTrying to add index \\'doc_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_indexepoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 2\\\\nThe job add_supportai_indexes completes in 2.059 seconds!\\\\nLocal schema change succeeded.\"'}"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -79,7 +79,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -95,18 +95,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'job_name': 'load_documents_content_json_203b064024e3499ea41b876cc67a85cf',\n",
-       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_203b064024e3499ea41b876cc67a85cf.stream.SupportAI_GraphRAG_pytgdocs_5b098715edbd4c878f7425918eb553c0.1721853566538',\n",
-       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_203b064024e3499ea41b876cc67a85cf.stream.SupportAI_GraphRAG_pytgdocs_5b098715edbd4c878f7425918eb553c0.1721853566538'}"
+       "{'job_name': 'load_documents_content_json_a245f14bb5f443acaa051125e4d9a497',\n",
+       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_a245f14bb5f443acaa051125e4d9a497.stream.SupportAI_GraphRAG_pytgdocs_025b08b3cf60477dbbcfd22b4254d268.1722356202522',\n",
+       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_a245f14bb5f443acaa051125e4d9a497.stream.SupportAI_GraphRAG_pytgdocs_025b08b3cf60477dbbcfd22b4254d268.1722356202522'}"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -121,39 +121,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'asdf' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43masdf\u001b[49m\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'asdf' is not defined"
+     ]
+    }
+   ],
    "source": [
     "asdf"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'job_name': 'load_documents_content_json_203b064024e3499ea41b876cc67a85cf',\n",
-       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_203b064024e3499ea41b876cc67a85cf.stream.SupportAI_GraphRAG_pytgdocs_5b098715edbd4c878f7425918eb553c0.1721853623658',\n",
-       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_203b064024e3499ea41b876cc67a85cf.stream.SupportAI_GraphRAG_pytgdocs_5b098715edbd4c878f7425918eb553c0.1721853623658'}"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "for v in [\"Document\", \"Content\", \"DocumentChunk\"]:\n",
+    "for v in [\"Document\", \"Content\", \"DocumentChunk\",\"Entity\"]:\n",
     "    try:\n",
     "        conn.delVertices(v)\n",
     "    except:\n",
     "        pass\n",
     "\n",
     "import time\n",
+    "\n",
     "time.sleep(3)\n",
     "conn.ai.runDocumentIngest(\n",
     "    res[\"load_job_id\"],\n",
@@ -168,102 +168,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import httpx\n",
-    "import base64\n",
-    "\n",
-    "# conn.ai.forceConsistencyUpdate()\n",
-    "# url = self.nlqs_host+\"/\"+self.conn.graphname+\"/supportai/forceupdate\"\n",
-    "# return self.conn._req(\"GET\", url, authMode=\"pwd\", resKey=None)\n",
-    "httpx.get(f\"http://localhost:8000/{conn.graphname}/supportai/forceupdate\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
-    "from langchain_openai import ChatOpenAI\n",
-    "\n",
-    "\n",
-    "class Joke(BaseModel):\n",
-    "    setup: str = Field(description=\"The setup of the joke\")\n",
-    "    punchline: str = Field(description=\"The punchline to the joke\")\n",
-    "\n",
-    "\n",
-    "model = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)\n",
-    "print(model.invoke(\"hi\"))\n",
-    "structured_llm = model.with_structured_output(Joke)\n",
-    "structured_llm.invoke(\"Tell me a joke about cats\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain_core.documents import Document\n",
-    "from langchain_experimental.graph_transformers import LLMGraphTransformer\n",
-    "from langchain_openai import ChatOpenAI\n",
-    "import os\n",
-    "\n",
-    "# from langchain_core.pydantic_v1 import BaseModel\n",
-    "from pydantic import BaseModel\n",
-    "\n",
-    "\n",
-    "class AnswerWithJustification(BaseModel):\n",
-    "    \"\"\"An answer to the user question along with justification for the answer.\"\"\"\n",
-    "\n",
-    "    answer: str\n",
-    "    justification: str\n",
-    "\n",
-    "\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
-    "model_name = \"gpt-4o-mini\"\n",
-    "llm = ChatOpenAI(model=model_name, temperature=0)\n",
-    "# sllm = llm.with_structured_output(AnswerWithJustification)\n",
-    "# print(sllm.invoke(\"What weighs more a pound of bricks or a pound of feathers\"))\n",
-    "\n",
-    "\n",
-    "class GraphExtractor:\n",
-    "    def __init__(self):\n",
-    "        self.transformer = LLMGraphTransformer(\n",
-    "            llm=llm,\n",
-    "            node_properties=[\"description\"],\n",
-    "            relationship_properties=[\"description\"],\n",
-    "        )\n",
-    "\n",
-    "    def extract(self, text):\n",
-    "        doc = Document(page_content=text)\n",
-    "        graph_docs = self.transformer.convert_to_graph_documents([doc])\n",
-    "        return graph_docs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "text = \"\"\"\n",
-    "Marie Curie, born in 1867, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.\n",
-    "She was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.\n",
-    "Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.\n",
-    "She was, in 1906, the first woman to become a professor at the University of Paris.\n",
-    "\"\"\"\n",
-    "ge = GraphExtractor()\n",
-    "\n",
-    "docs = ge.extract(text)\n",
-    "for d in docs:\n",
-    "    for n in d.nodes:\n",
-    "        print(n)\n",
-    "    for r in d.relationships:\n",
-    "        print(r)\n",
-    "# print(f\"Nodes:{docs[0].nodes}\")\n",
-    "# print(f\"Relationships:{docs[0].relationships}\")\n",
-    "# docs"
+    "conn.gsql(f\"\"\"\n",
+    "USE GRAPH {conn.graphname}\n",
+    "DROP QUERY StreamDocIds\n",
+    "DROP QUERY StreamDocContent\n",
+    "\"\"\")"
    ]
   }
  ],
diff --git a/eventual-consistency-service/app/graphrag/graph_rag.py b/eventual-consistency-service/app/graphrag/graph_rag.py
index e248510c..7e67b342 100644
--- a/eventual-consistency-service/app/graphrag/graph_rag.py
+++ b/eventual-consistency-service/app/graphrag/graph_rag.py
@@ -1,16 +1,16 @@
 import asyncio
 import logging
 import time
+import traceback
 
 import httpx
 from aiochannel import Channel
-from graphrag import workers
-from graphrag.util import init, make_headers, stream_doc_ids,http_timeout
-from pyTigerGraph import TigerGraphConnection
-
 from common.config import embedding_service
 from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore
 from common.extractors.BaseExtractor import BaseExtractor
+from graphrag import workers
+from graphrag.util import http_timeout, init, make_headers, stream_doc_ids
+from pyTigerGraph import TigerGraphConnection
 
 http_logs = logging.getLogger("httpx")
 http_logs.setLevel(logging.WARNING)
@@ -32,28 +32,32 @@ async def stream_docs(
     for i in range(ttl_batches):
         doc_ids = await stream_doc_ids(conn, i, ttl_batches)
         if doc_ids["error"]:
-            continue  # TODO: handle error
+            # continue to the next batch.
+            # These docs will not be marked as processed, so the ecc will process it eventually.
+            continue
 
-        logger.info("********doc_ids")
-        logger.info(doc_ids)
-        logger.info("********")
         for d in doc_ids["ids"]:
             async with httpx.AsyncClient(timeout=http_timeout) as client:
-                res = await client.get(
-                    f"{conn.restppUrl}/query/{conn.graphname}/StreamDocContent/",
-                    params={"doc": d},
-                    headers=headers,
-                )
-                # TODO: check for errors
-                # this will block and wait if the channel is full
-                logger.info("steam_docs writes to docs")
-                await docs_chan.put(res.json()["results"][0]["DocContent"][0])
-            # break  # single doc test FIXME: delete
-        # break  # single batch test FIXME: delete
+                try:
+                    res = await client.get(
+                        f"{conn.restppUrl}/query/{conn.graphname}/StreamDocContent/",
+                        params={"doc": d},
+                        headers=headers,
+                    )
+                    if res.status_code != 200:
+                        # continue to the next doc.
+                        # This doc will not be marked as processed, so the ecc will process it eventually.
+                        continue
+                    logger.info("steam_docs writes to docs")
+                    await docs_chan.put(res.json()["results"][0]["DocContent"][0])
+                except Exception as e:
+                    exc = traceback.format_exc()
+                    logger.error(f"Error retrieving doc: {d} --> {e}\n{exc}")
+                    continue  # try retrieving the next doc
 
     logger.info("stream_docs done")
     # close the docs chan -- this function is the only sender
-    logger.info("****** closing docs chan")
+    logger.info("closing docs chan")
     docs_chan.close()
 
 
@@ -72,8 +76,6 @@ async def chunk_docs(
     doc_tasks = []
     async with asyncio.TaskGroup() as grp:
         async for content in docs_chan:
-            logger.info("*********reading from docs chan")
-            # continue
             v_id = content["v_id"]
             txt = content["attributes"]["text"]
             # send the document to be embedded
@@ -84,17 +86,12 @@ async def chunk_docs(
                 workers.chunk_doc(conn, content, upsert_chan, embed_chan, extract_chan)
             )
             doc_tasks.append(task)
-            # break  # single doc  FIXME: delete
-            logger.info("*********done reading from docs chan")
 
     logger.info("chunk_docs done")
-    # do something with doc_tasks?
-    # for t in doc_tasks:
-    # logger.info(t.result())
 
     # close the extract chan -- chunk_doc is the only sender
     # and chunk_doc calls are kicked off from here
-    logger.info("********closing extract chan")
+    logger.info("closing extract_chan")
     extract_chan.close()
 
 
@@ -110,13 +107,11 @@ async def upsert(upsert_chan: Channel):
     upsert_tasks = []
     async with asyncio.TaskGroup() as grp:
         async for func, args in upsert_chan:
-            logger.info("*********reading from upsert chan")
             logger.info(f"{func.__name__}, {args[1]}")
             # continue
             # execute the task
             t = grp.create_task(func(*args))
             upsert_tasks.append(t)
-            logger.info("*********done reading from upsert chan")
 
     logger.info(f"upsert done")
     # do something with doc_tasks?
@@ -136,7 +131,6 @@ async def embed(
     async with asyncio.TaskGroup() as grp:
         # consume task queue
         async for v_id, content, index_name in embed_chan:
-            logger.info("*********reading from embed chan")
             # continue
             embedding_store = index_stores[f"{graphname}_{index_name}"]
             logger.info(f"Embed to {graphname}_{index_name}: {v_id}")
@@ -148,7 +142,6 @@ async def embed(
                     content,
                 )
             )
-            logger.info("*********done reading from embed chan")
 
     logger.info(f"embed done")
 
@@ -169,17 +162,13 @@ async def extract(
     # consume task queue
     async with asyncio.TaskGroup() as grp:
         async for item in extract_chan:
-            logger.info("*********reading from extract chan")
-            logger.info("*********done reading from extract chan")
             grp.create_task(
                 workers.extract(upsert_chan, embed_chan, extractor, conn, *item)
             )
-            # append task results to worker results/response
-            logger.info("*********done reading from extract chan")
 
     logger.info(f"extract done")
 
-    logger.info("****closing upsert and embed chan")
+    logger.info("closing upsert and embed chan")
     upsert_chan.close()
     embed_chan.close()
 
@@ -202,9 +191,8 @@ async def run(graphname: str, conn: TigerGraphConnection):
     # return
     start = time.perf_counter()
 
-    # TODO: make configurable
     tasks = []
-    docs_chan = Channel(1)  # process n chunks at a time max
+    docs_chan = Channel(1)
     embed_chan = Channel(100)
     upsert_chan = Channel(100)
     extract_chan = Channel(100)
@@ -230,5 +218,4 @@ async def run(graphname: str, conn: TigerGraphConnection):
         tasks.append(t)
     end = time.perf_counter()
 
-    logger.info("DONE")
-    logger.info(end - start)
+    logger.info(f"DONE. graphrag.run elapsed: {end-start}")
diff --git a/eventual-consistency-service/app/graphrag/util.py b/eventual-consistency-service/app/graphrag/util.py
index 3fb8f916..8f2c2141 100644
--- a/eventual-consistency-service/app/graphrag/util.py
+++ b/eventual-consistency-service/app/graphrag/util.py
@@ -5,9 +5,6 @@
 import traceback
 
 import httpx
-from graphrag import workers
-from pyTigerGraph import TigerGraphConnection
-
 from common.config import (
     doc_processing_config,
     embedding_service,
@@ -19,6 +16,8 @@
 from common.extractors import GraphExtractor, LLMEntityRelationshipExtractor
 from common.extractors.BaseExtractor import BaseExtractor
 from common.logs.logwriter import LogWriter
+from graphrag import workers
+from pyTigerGraph import TigerGraphConnection
 
 logger = logging.getLogger(__name__)
 http_timeout = httpx.Timeout(15.0)
@@ -33,6 +32,7 @@ async def install_queries(
     tasks = []
     async with asyncio.TaskGroup() as grp:
         for q in requried_queries:
+            # only install n queries at a time (n=n_workers)
             async with asyncio.Semaphore(n_workers):
                 q_name = q.split("/")[-1]
                 # if the query is not installed, install it
@@ -41,8 +41,17 @@ async def install_queries(
                     tasks.append(task)
 
     for t in tasks:
-        logger.info(t.result())
-        # TODO: Check if anything had an error
+        res = t.result()
+        # stop system if a required query doesn't install
+        if res["error"]:
+            raise Exception(res["message"])
+
+
+async def init_embedding_index(s: MilvusEmbeddingStore, vertex_field: str):
+    content = "init"
+    vec = embedding_service.embed_query(content)
+    await s.aadd_embeddings([(content, vec)], [{vertex_field: content}])
+    s.remove_embeddings(expr=f"{vertex_field} in ['{content}']")
 
 
 async def init(
@@ -78,28 +87,28 @@ async def init(
         ],
     )
     index_stores = {}
-    content = "init"
-    # TODO:do concurrently 
-    for index_name in index_names:
-        name = conn.graphname + "_" + index_name
-        s = MilvusEmbeddingStore(
-            embedding_service,
-            host=milvus_config["host"],
-            port=milvus_config["port"],
-            support_ai_instance=True,
-            collection_name=name,
-            username=milvus_config.get("username", ""),
-            password=milvus_config.get("password", ""),
-            vector_field=milvus_config.get("vector_field", "document_vector"),
-            text_field=milvus_config.get("text_field", "document_content"),
-            vertex_field=vertex_field,
-        )
-        # TODO: only do this if collection doesn't exist
-        vec = embedding_service.embed_query(content)
-        LogWriter.info(f"Initializing {name}")
-        s.add_embeddings([(content, vec)], [{vertex_field: content}])
-        s.remove_embeddings(expr=f"{vertex_field} in ['{content}']")
-        index_stores[name] = s
+    async with asyncio.TaskGroup() as tg:
+        for index_name in index_names:
+            name = conn.graphname + "_" + index_name
+            s = MilvusEmbeddingStore(
+                embedding_service,
+                host=milvus_config["host"],
+                port=milvus_config["port"],
+                support_ai_instance=True,
+                collection_name=name,
+                username=milvus_config.get("username", ""),
+                password=milvus_config.get("password", ""),
+                vector_field=milvus_config.get("vector_field", "document_vector"),
+                text_field=milvus_config.get("text_field", "document_content"),
+                vertex_field=vertex_field,
+            )
+
+            LogWriter.info(f"Initializing {name}")
+            # init collection if it doesn't exist
+            if not s.check_collection_exists():
+                tg.create_task(init_embedding_index(s, vertex_field))
+            
+            index_stores[name] = s
 
     return extractor, index_stores
 
diff --git a/eventual-consistency-service/app/graphrag/workers.py b/eventual-consistency-service/app/graphrag/workers.py
index 3eb0d0dd..b7267b60 100644
--- a/eventual-consistency-service/app/graphrag/workers.py
+++ b/eventual-consistency-service/app/graphrag/workers.py
@@ -6,15 +6,14 @@
 import ecc_util
 import httpx
 from aiochannel import Channel
-from graphrag import util  # import upsert_edge, upsert_vertex
-from langchain_community.graphs.graph_document import GraphDocument
-from pyTigerGraph import TigerGraphConnection
-
 from common.config import milvus_config
 from common.embeddings.embedding_services import EmbeddingModel
 from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore
 from common.extractors.BaseExtractor import BaseExtractor
 from common.logs.logwriter import LogWriter
+from graphrag import util  # import upsert_edge, upsert_vertex
+from langchain_community.graphs.graph_document import GraphDocument
+from pyTigerGraph import TigerGraphConnection
 
 vertex_field = milvus_config.get("vertex_field", "vertex_id")
 
@@ -36,7 +35,7 @@ async def install_query(
     tkn = base64.b64encode(f"{conn.username}:{conn.password}".encode()).decode()
     headers = {"Authorization": f"Basic {tkn}"}
 
-    async with httpx.AsyncClient(timeout=util.http_timeout) as client:
+    async with httpx.AsyncClient(timeout=None) as client:
         res = await client.post(
             conn.gsUrl + "/gsqlserver/gsql/file",
             data=quote_plus(query.encode("utf-8")),
@@ -70,26 +69,24 @@ async def chunk_doc(
     chunks = chunker.chunk(doc["attributes"]["text"])
     v_id = doc["v_id"]
     logger.info(f"Chunking {v_id}")
-    # TODO: n chunks at a time
     for i, chunk in enumerate(chunks):
         chunk_id = f"{v_id}_chunk_{i}"
         # send chunks to be upserted (func, args)
-        logger.info("chunk writes to upsert")
+        logger.info("chunk writes to upsert_chan")
         await upsert_chan.put((upsert_chunk, (conn, v_id, chunk_id, chunk)))
 
         # send chunks to be embedded
-        logger.info("chunk writes to embed")
+        logger.info("chunk writes to embed_chan")
         await embed_chan.put((v_id, chunk, "DocumentChunk"))
 
         # send chunks to have entities extracted
-        logger.info("chunk writes to extract")
+        logger.info("chunk writes to extract_chan")
         await extract_chan.put((chunk, chunk_id))
 
     return doc["v_id"]
 
 
 async def upsert_chunk(conn: TigerGraphConnection, doc_id, chunk_id, chunk):
-    logger.info(f"Upserting chunk {chunk_id}")
     logger.info(f"Upserting chunk {chunk_id}")
     date_added = int(time.time())
     await util.upsert_vertex(
@@ -142,7 +139,7 @@ async def embed(
         index_name: str
             the vertex index to write to
     """
-    logger.info(f"Embedding {v_id}, {content}")
+    logger.info(f"Embedding {v_id}")
 
     vec = await embed_svc.aembed_query(content)
     await embed_store.aadd_embeddings([(content, vec)], [{vertex_field: v_id}])
@@ -161,8 +158,7 @@ async def extract(
     # upsert nodes and edges to the graph
     for doc in extracted:
         for node in doc.nodes:
-            logger.info("extract writes entity vert to upsert")
-            logger.info(f"Node: {node.id}| props: {node.properties}")
+            logger.info(f"extract writes entity vert to upsert\nNode: {node.id}")
             v_id = str(node.id)
             desc = node.properties.get("description", "")
             await upsert_chan.put(
@@ -203,8 +199,9 @@ async def extract(
             await embed_chan.put((v_id, desc, "Entity"))
 
         for edge in doc.relationships:
-            logger.info("extract writes relates edge to upsert")
-            logger.info(f"{edge}")
+            logger.info(
+                f"extract writes relates edge to upsert\n{edge.source.id} -({edge.type})->  {edge.target.id}"
+            )
             await upsert_chan.put(
                 (
                     util.upsert_edge,
@@ -221,6 +218,3 @@ async def extract(
             )
             # embed "Relationship",
             # (v_id, content, index_name)
-
-    # TODO:
-    # embed the extracted entities

From e9f178e34e39404774e76dd599f3917ba5856ac6 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Thu, 1 Aug 2024 18:09:26 -0400
Subject: [PATCH 08/53] save

---
 common/embeddings/embedding_services.py       |   2 +-
 common/embeddings/milvus_embedding_store.py   |  73 ++++-
 common/gsql/graphRAG/.clang-format            | 269 ++++++++++++++++++
 common/gsql/graphRAG/.clangd                  |   2 +
 .../gsql/graphRAG/ResolveRelationships.gsql   |  26 ++
 common/gsql/graphRAG/SetEpochProcessing.gsql  |   7 +
 common/gsql/graphRAG/StreamIds.gsql           |  16 ++
 common/gsql/graphRAG/leven.cpp                |  59 ++++
 common/gsql/graphRAG/louvain/louvain1.gsql    |  17 ++
 .../louvain_1_first_pass.gsql                 |  16 +-
 .../louvain_2_other_passes.gsql               |   0
 .../louvain_3_final_community.gsql            |   0
 .../louvain_4_modularity_1_for_pass.gsql      |   0
 .../louvain_4_modularity_2_final.gsql         |   0
 .../louvain_5_reset.gsql                      |   0
 common/gsql/supportai/SupportAI_Schema.gsql   |   4 +-
 copilot/docs/notebooks/graphrag.ipynb         | 227 +++++++++++++--
 .../app/graphrag/graph_rag.py                 | 188 ++++++++----
 .../app/graphrag/util.py                      |  50 +++-
 .../app/graphrag/workers.py                   | 148 +++++++++-
 20 files changed, 989 insertions(+), 115 deletions(-)
 create mode 100644 common/gsql/graphRAG/.clang-format
 create mode 100644 common/gsql/graphRAG/.clangd
 create mode 100644 common/gsql/graphRAG/ResolveRelationships.gsql
 create mode 100644 common/gsql/graphRAG/SetEpochProcessing.gsql
 create mode 100644 common/gsql/graphRAG/StreamIds.gsql
 create mode 100644 common/gsql/graphRAG/leven.cpp
 create mode 100644 common/gsql/graphRAG/louvain/louvain1.gsql
 rename common/gsql/graphRAG/{louvain => louvain_old}/louvain_1_first_pass.gsql (88%)
 rename common/gsql/graphRAG/{louvain => louvain_old}/louvain_2_other_passes.gsql (100%)
 rename common/gsql/graphRAG/{louvain => louvain_old}/louvain_3_final_community.gsql (100%)
 rename common/gsql/graphRAG/{louvain => louvain_old}/louvain_4_modularity_1_for_pass.gsql (100%)
 rename common/gsql/graphRAG/{louvain => louvain_old}/louvain_4_modularity_2_final.gsql (100%)
 rename common/gsql/graphRAG/{louvain => louvain_old}/louvain_5_reset.gsql (100%)

diff --git a/common/embeddings/embedding_services.py b/common/embeddings/embedding_services.py
index 7ce17478..dd506670 100644
--- a/common/embeddings/embedding_services.py
+++ b/common/embeddings/embedding_services.py
@@ -136,7 +136,7 @@ def __init__(self, config):
         )
         from langchain.embeddings import OpenAIEmbeddings
 
-        self.embeddings = OpenAIEmbeddings().aembed_query
+        self.embeddings = OpenAIEmbeddings()
 
 
 class VertexAI_PaLM_Embedding(EmbeddingModel):
diff --git a/common/embeddings/milvus_embedding_store.py b/common/embeddings/milvus_embedding_store.py
index ac9c5389..fd57c783 100644
--- a/common/embeddings/milvus_embedding_store.py
+++ b/common/embeddings/milvus_embedding_store.py
@@ -3,15 +3,16 @@
 from time import sleep, time
 from typing import Iterable, List, Optional, Tuple
 
-from langchain_community.vectorstores import Milvus
-from langchain_core.documents.base import Document
-from pymilvus import MilvusException, connections, utility
-
+import Levenshtein as lev
+from asyncer import asyncify
 from common.embeddings.base_embedding_store import EmbeddingStore
 from common.embeddings.embedding_services import EmbeddingModel
 from common.logs.log import req_id_cv
 from common.logs.logwriter import LogWriter
 from common.metrics.prometheus_metrics import metrics
+from langchain_community.vectorstores import Milvus
+from langchain_core.documents.base import Document
+from pymilvus import MilvusException, SearchResult, connections, utility
 
 logger = logging.getLogger(__name__)
 
@@ -32,6 +33,7 @@ def __init__(
         alias: str = "alias",
         retry_interval: int = 2,
         max_retry_attempts: int = 10,
+        drop_old=False,
     ):
         self.embedding_service = embedding_service
         self.vector_field = vector_field
@@ -42,6 +44,7 @@ def __init__(
         self.milvus_alias = alias
         self.retry_interval = retry_interval
         self.max_retry_attempts = max_retry_attempts
+        self.drop_old = drop_old
 
         if host.startswith("http"):
             if host.endswith(str(port)):
@@ -86,7 +89,7 @@ def connect_to_milvus(self):
                     collection_name=self.collection_name,
                     connection_args=self.milvus_connection,
                     auto_id=True,
-                    drop_old=False,
+                    drop_old=self.drop_old,
                     text_field=self.text_field,
                     vector_field=self.vector_field,
                 )
@@ -118,6 +121,9 @@ def metadata_func(record: dict, metadata: dict) -> dict:
                 return metadata
 
             LogWriter.info("Milvus add initial load documents init()")
+            import os
+
+            logger.info(f"*******{os.path.exists('tg_documents')}")
             loader = DirectoryLoader(
                 "./tg_documents/",
                 glob="*.json",
@@ -584,5 +590,62 @@ def query(self, expr: str, output_fields: List[str]):
 
         return query_result
 
+    def edit_dist_check(self, a: str, b: str, edit_dist_threshold: float, p=False):
+        a = a.lower()
+        b = b.lower()
+        # if the words are short, they should be the same
+        if len(a) < 5 and len(b) < 5:
+            return a == b
+
+        # edit_dist_threshold (as a percent) of word must match
+        threshold = int(min(len(a), len(b)) * (1 - edit_dist_threshold))
+        if p:
+            print(a, b, threshold, lev.distance(a, b))
+        return lev.distance(a, b) < threshold
+
+    async def aget_k_closest(
+        self, v_id: str, k=15, threshold_similarity=0.90, edit_dist_threshold_pct=0.75
+    ) -> list[Document]:
+        """
+        asdf
+        """
+        threshold_dist = 1 - threshold_similarity
+
+        # asyncify necessary funcs
+        query = asyncify(self.milvus.col.query)
+        search = asyncify(self.milvus.similarity_search_with_score_by_vector)
+
+        # Get all vectors with this ID
+        verts = await query(
+            f'{self.vertex_field} == "{v_id}"',
+            output_fields=[self.vertex_field, self.vector_field],
+        )
+        result = []
+        for v in verts:
+            # get the k closest verts
+            sim = await search(
+                v["document_vector"],
+                k=k,
+            )
+            # filter verts using similiarity threshold and leven_dist
+            similar_verts = [
+                doc.metadata["vertex_id"]
+                for doc, dist in sim
+                # check semantic similarity
+                if dist < threshold_dist
+                # check name similarity (won't merge Apple and Google if they're semantically similar)
+                and self.edit_dist_check(
+                    doc.metadata["vertex_id"],
+                    v_id,
+                    edit_dist_threshold_pct,
+                    # v_id == "Dataframe",
+                )
+                # don't have to merge verts with the same id (they're the same)
+                and doc.metadata["vertex_id"] != v_id
+            ]
+            result.extend(similar_verts)
+        result.append(v_id)
+        return set(result)
+
     def __del__(self):
         metrics.milvus_active_connections.labels(self.collection_name).dec
diff --git a/common/gsql/graphRAG/.clang-format b/common/gsql/graphRAG/.clang-format
new file mode 100644
index 00000000..f0dcec6c
--- /dev/null
+++ b/common/gsql/graphRAG/.clang-format
@@ -0,0 +1,269 @@
+---
+Language:        Cpp
+# BasedOnStyle:  Google
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignArrayOfStructures: None
+AlignConsecutiveAssignments:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  PadOperators:    true
+AlignConsecutiveBitFields:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  PadOperators:    false
+AlignConsecutiveDeclarations:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  PadOperators:    false
+AlignConsecutiveMacros:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCompound:   false
+  PadOperators:    false
+AlignConsecutiveShortCaseStatements:
+  Enabled:         false
+  AcrossEmptyLines: false
+  AcrossComments:  false
+  AlignCaseColons: false
+AlignEscapedNewlines: Left
+AlignOperands:   Align
+AlignTrailingComments:
+  Kind:            Always
+  OverEmptyLines:  0
+AllowAllArgumentsOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: Never
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortEnumsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: All
+AllowShortIfStatementsOnASingleLine: WithoutElse
+AllowShortLambdasOnASingleLine: All
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: Yes
+AttributeMacros:
+  - __capability
+BinPackArguments: true
+BinPackParameters: true
+BitFieldColonSpacing: Both
+BraceWrapping:
+  AfterCaseLabel:  false
+  AfterClass:      false
+  AfterControlStatement: Never
+  AfterEnum:       false
+  AfterExternBlock: false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  BeforeCatch:     false
+  BeforeElse:      false
+  BeforeLambdaBody: false
+  BeforeWhile:     false
+  IndentBraces:    false
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
+  SplitEmptyNamespace: true
+BreakAfterAttributes: Never
+BreakAfterJavaFieldAnnotations: false
+BreakArrays:     true
+BreakBeforeBinaryOperators: None
+BreakBeforeConceptDeclarations: Always
+BreakBeforeBraces: Attach
+BreakBeforeInlineASMColon: OnlyMultiline
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializers: BeforeColon
+BreakInheritanceList: BeforeColon
+BreakStringLiterals: true
+ColumnLimit:     80
+CommentPragmas:  '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: true
+DisableFormat:   false
+EmptyLineAfterAccessModifier: Never
+EmptyLineBeforeAccessModifier: LogicalBlock
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:
+  - foreach
+  - Q_FOREACH
+  - BOOST_FOREACH
+IfMacros:
+  - KJ_IF_MAYBE
+IncludeBlocks:   Regroup
+IncludeCategories:
+  - Regex:           '^<ext/.*\.h>'
+    Priority:        2
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '^<.*'
+    Priority:        2
+    SortPriority:    0
+    CaseSensitive:   false
+  - Regex:           '.*'
+    Priority:        3
+    SortPriority:    0
+    CaseSensitive:   false
+IncludeIsMainRegex: '([-_](test|unittest))?$'
+IncludeIsMainSourceRegex: ''
+IndentAccessModifiers: false
+IndentCaseBlocks: false
+IndentCaseLabels: true
+IndentExternBlock: AfterExternBlock
+IndentGotoLabels: true
+IndentPPDirectives: None
+IndentRequiresClause: true
+IndentWidth:     4
+IndentWrappedFunctionNames: false
+InsertBraces:    false
+InsertNewlineAtEOF: false
+InsertTrailingCommas: None
+IntegerLiteralSeparator:
+  Binary:          0
+  BinaryMinDigits: 0
+  Decimal:         0
+  DecimalMinDigits: 0
+  Hex:             0
+  HexMinDigits:    0
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+KeepEmptyLinesAtEOF: false
+LambdaBodyIndentation: Signature
+LineEnding:      DeriveLF
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBinPackProtocolList: Never
+ObjCBlockIndentWidth: 4
+ObjCBreakBeforeNestedBlockParam: true
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PackConstructorInitializers: NextLine
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakOpenParenthesis: 0
+PenaltyBreakString: 1000
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyIndentedWhitespace: 0
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Left
+PPIndentWidth:   -1
+QualifierAlignment: Leave
+RawStringFormats:
+  - Language:        Cpp
+    Delimiters:
+      - cc
+      - CC
+      - cpp
+      - Cpp
+      - CPP
+      - 'c++'
+      - 'C++'
+    CanonicalDelimiter: ''
+    BasedOnStyle:    google
+  - Language:        TextProto
+    Delimiters:
+      - pb
+      - PB
+      - proto
+      - PROTO
+    EnclosingFunctions:
+      - EqualsProto
+      - EquivToProto
+      - PARSE_PARTIAL_TEXT_PROTO
+      - PARSE_TEST_PROTO
+      - PARSE_TEXT_PROTO
+      - ParseTextOrDie
+      - ParseTextProtoOrDie
+      - ParseTestProto
+      - ParsePartialTestProto
+    CanonicalDelimiter: pb
+    BasedOnStyle:    google
+ReferenceAlignment: Pointer
+ReflowComments:  true
+RemoveBracesLLVM: false
+RemoveParentheses: Leave
+RemoveSemicolon: false
+RequiresClausePosition: OwnLine
+RequiresExpressionIndentation: OuterScope
+SeparateDefinitionBlocks: Leave
+ShortNamespaceLines: 1
+SortIncludes:    CaseSensitive
+SortJavaStaticImport: Before
+SortUsingDeclarations: LexicographicNumeric
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceAroundPointerQualifiers: Default
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCaseColon: false
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeJsonColon: false
+SpaceBeforeParens: ControlStatements
+SpaceBeforeParensOptions:
+  AfterControlStatements: true
+  AfterForeachMacros: true
+  AfterFunctionDefinitionName: false
+  AfterFunctionDeclarationName: false
+  AfterIfMacros:   true
+  AfterOverloadedOperator: false
+  AfterRequiresInClause: false
+  AfterRequiresInExpression: false
+  BeforeNonEmptyParentheses: false
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceBeforeSquareBrackets: false
+SpaceInEmptyBlock: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  Never
+SpacesInContainerLiterals: true
+SpacesInLineCommentPrefix:
+  Minimum:         1
+  Maximum:         -1
+SpacesInParens:  Never
+SpacesInParensOptions:
+  InCStyleCasts:   false
+  InConditionalStatements: false
+  InEmptyParentheses: false
+  Other:           false
+SpacesInSquareBrackets: false
+Standard:        Auto
+StatementAttributeLikeMacros:
+  - Q_EMIT
+StatementMacros:
+  - Q_UNUSED
+  - QT_REQUIRE_VERSION
+TabWidth:        8
+UseTab:          Never
+VerilogBreakBetweenInstancePorts: true
+WhitespaceSensitiveMacros:
+  - BOOST_PP_STRINGIZE
+  - CF_SWIFT_NAME
+  - NS_SWIFT_NAME
+  - PP_STRINGIZE
+  - STRINGIZE
+...
diff --git a/common/gsql/graphRAG/.clangd b/common/gsql/graphRAG/.clangd
new file mode 100644
index 00000000..ec3be0d8
--- /dev/null
+++ b/common/gsql/graphRAG/.clangd
@@ -0,0 +1,2 @@
+CompileFlags:       
+  Add: [ -std=c++23 ]
diff --git a/common/gsql/graphRAG/ResolveRelationships.gsql b/common/gsql/graphRAG/ResolveRelationships.gsql
new file mode 100644
index 00000000..d3c69297
--- /dev/null
+++ b/common/gsql/graphRAG/ResolveRelationships.gsql
@@ -0,0 +1,26 @@
+CREATE DISTRIBUTED QUERY ResolveRelationships(BOOL printResults=FALSE) SYNTAX V2 {
+    /*
+     * RE1 <- entity -RELATES-> entity -> RE2
+     * to
+     * RE1 -resolved-> RE 
+     *
+     * Combines all of a Resolved entity's children's relationships into
+     * RESOLVED_RELATIONSHIP
+     */
+    REs = {ResolvedEntity.*};
+
+    
+    REs = SELECT re1 FROM REs:re1 -(<RESOLVES_TO)- Entity:e -(RELATIONSHIP>:rel)- Entity:e_tgt -(RESOLVES_TO>:r)- ResolvedEntity:re2
+          // Connect the The first RE to the second RE
+          ACCUM 
+            INSERT INTO RESOLVED_RELATIONSHIP(FROM,TO) VALUES(re1, re2);
+
+
+    IF printResults THEN
+        // show which entities didn't get resolved
+        Ents = {Entity.*};
+        rEnts = SELECT e FROM Ents:e -(RESOLVES_TO>)- _;
+        ents = Ents minus rEnts;
+        PRINT ents;
+    END;
+}
diff --git a/common/gsql/graphRAG/SetEpochProcessing.gsql b/common/gsql/graphRAG/SetEpochProcessing.gsql
new file mode 100644
index 00000000..9a92ecf9
--- /dev/null
+++ b/common/gsql/graphRAG/SetEpochProcessing.gsql
@@ -0,0 +1,7 @@
+CREATE DISTRIBUTED QUERY SetEpochProcessing(Vertex<Entity> v_id) {
+    Verts = {v_id};
+
+    // mark the vertex as processed
+    Verts = SELECT v FROM Verts:v
+           POST-ACCUM v.epoch_processed = datetime_to_epoch(now());
+}
diff --git a/common/gsql/graphRAG/StreamIds.gsql b/common/gsql/graphRAG/StreamIds.gsql
new file mode 100644
index 00000000..41181007
--- /dev/null
+++ b/common/gsql/graphRAG/StreamIds.gsql
@@ -0,0 +1,16 @@
+CREATE DISTRIBUTED QUERY StreamIds(INT current_batch, INT ttl_batches, STRING v_type) {
+    /* 
+     * Get the IDs of entities that have not already been processed 
+     * (one batch at a time)
+     */
+    ListAccum<STRING> @@ids;
+    Verts = {v_type};
+
+    Verts = SELECT v FROM Verts:v
+           WHERE vertex_to_int(v) % ttl_batches == current_batch
+             AND v.epoch_processed == 0
+           ACCUM @@ids += v.id
+           POST-ACCUM v.epoch_processing = datetime_to_epoch(now()); // set the processing time
+    
+    PRINT @@ids;
+}
diff --git a/common/gsql/graphRAG/leven.cpp b/common/gsql/graphRAG/leven.cpp
new file mode 100644
index 00000000..10c45669
--- /dev/null
+++ b/common/gsql/graphRAG/leven.cpp
@@ -0,0 +1,59 @@
+#include <iostream>
+#include <ostream>
+
+// Returns the Levenshtein distance between word1 and word2.
+int levenshteinDist(std::string word1, std::string word2) {
+    int size1 = word1.size();
+    int size2 = word2.size();
+    int verif[size1 + 1][size2 + 1]; // Verification matrix i.e. 2D array
+    // which will store the calculated distance.
+
+    // If one of the words has zero length, the distance is equal to the size of
+    // the other word.
+    if (size1 == 0) return size2;
+    if (size2 == 0) return size1;
+
+    // Sets the first row and the first column of the verification matrix with
+    // the numerical order from 0 to the length of each word.
+    for (int i = 0; i <= size1; i++) verif[i][0] = i;
+    for (int j = 0; j <= size2; j++) verif[0][j] = j;
+
+    // Verification step / matrix filling.
+    for (int i = 1; i <= size1; i++) {
+        for (int j = 1; j <= size2; j++) {
+            // Sets the modification cost.
+            // 0 means no modification (i.e. equal letters) and 1 means that a
+            // modification is needed (i.e. unequal letters).
+            int cost = (word2[j - 1] == word1[i - 1]) ? 0 : 1;
+
+            // Sets the current position of the matrix as the minimum value
+            // between a (deletion), b (insertion) and c (substitution). a = the
+            // upper adjacent value plus 1: verif[i - 1][j] + 1 b = the left
+            // adjacent value plus 1: verif[i][j - 1] + 1 c = the upper left
+            // adjacent value plus the modification cost: verif[i - 1][j - 1] +
+            // cost
+            verif[i][j] =
+                std::min(std::min(verif[i - 1][j] + 1, verif[i][j - 1] + 1),
+                         verif[i - 1][j - 1] + cost);
+        }
+    }
+
+    // The last position of the matrix will contain the Levenshtein distance.
+    return verif[size1][size2];
+}
+
+int main() {
+    std::string word1, word2;
+
+    std::cout << "Please input the first word: " << std::endl;
+    std::cin >> word1;
+    std::cout << "Please input the second word: " << std::endl;
+    std::cin >> word2;
+
+    // cout << "The number of modifications needed in order to make one word "
+    //         "equal to the other is: "
+    std::cout << "The edit distance is: " << levenshteinDist(word1, word2)
+              << std::endl;
+
+    return 0;
+}
diff --git a/common/gsql/graphRAG/louvain/louvain1.gsql b/common/gsql/graphRAG/louvain/louvain1.gsql
new file mode 100644
index 00000000..494a3625
--- /dev/null
+++ b/common/gsql/graphRAG/louvain/louvain1.gsql
@@ -0,0 +1,17 @@
+CREATE DISTRIBUTED QUERY graphRAG_louvain_1() {
+    
+    Ents = {ResolvedEntity.*};
+
+    // Put each node into a distinct community
+    // Assume each Entity starts in its own community
+    
+    // For each node i
+        // Compute ∆Q (modularity) when putting node i into the community of some neighbor j
+        // move i to community that yields the largest gain in ∆Q
+
+    Z = SELECT v FROM Ents:v -(_:e)-> ResolvedEntity:r
+        
+
+    ;
+}
+
diff --git a/common/gsql/graphRAG/louvain/louvain_1_first_pass.gsql b/common/gsql/graphRAG/louvain_old/louvain_1_first_pass.gsql
similarity index 88%
rename from common/gsql/graphRAG/louvain/louvain_1_first_pass.gsql
rename to common/gsql/graphRAG/louvain_old/louvain_1_first_pass.gsql
index 4ca06029..0251909f 100644
--- a/common/gsql/graphRAG/louvain/louvain_1_first_pass.gsql
+++ b/common/gsql/graphRAG/louvain_old/louvain_1_first_pass.gsql
@@ -2,20 +2,20 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphRAG_louvain_1(
   UINT max_hop = 10,
   UINT batch_num = 12,
   UINT sample_edge_num = 100
-) FOR GRAPH {graph_name} SYNTAX v1 {
+) {
 
-  TYPEDEF TUPLE <DOUBLE delta_Q_add, VERTEX<{entity_vertex_name}> community, STRING ext_vid> MyTuple; --> this should be Community, I think
+  TYPEDEF TUPLE <DOUBLE delta_Q_add, VERTEX<ResolvedEntity> community, STRING ext_vid> MyTuple; //--> this should be Community, I think
   SumAccum<DOUBLE> @@m; // the sum of the weights of all the links in the network
-  MinAccum<VERTEX<{entity_vertex_name}>> @{community_id_attribute_name}; // the community ID of the node
+  MinAccum<VERTEX<ResolvedEntity>> @{community_id_attribute_name}; // the community ID of the node
   MinAccum<STRING> @community_vid; // the community ID of the node
   SumAccum<DOUBLE> @k; // the sum of the weights of the links incident to the node
   SumAccum<DOUBLE> @k_in; // the sum of the weights of the links inside the previous community of the node
   SumAccum<DOUBLE> @k_self_loop; // the weight of the self-loop link
-  MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>> @community_k_in_map; // the community of the neighbors of the nodes -> the sum of the weights of the links inside the community
-  MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>> @@community_sum_total_map; // community ID C -> the sum of the weights of the links incident to nodes in C
+  MapAccum<VERTEX<ResolvedEntity>, SumAccum<DOUBLE>> @community_k_in_map; // the community of the neighbors of the nodes -> the sum of the weights of the links inside the community
+  MapAccum<VERTEX<ResolvedEntity>, SumAccum<DOUBLE>> @@community_sum_total_map; // community ID C -> the sum of the weights of the links incident to nodes in C
   SumAccum<DOUBLE> @community_sum_total; // the sum of the weights of the links incident to nodes in the community of the node
-  MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>> @@community_sum_in_map; // community ID -> the sum of the weights of the links inside the community
-  MapAccum<VERTEX<{entity_vertex_name}>, MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>>> @@source_target_k_in_map; // source community ID -> (target community ID -> the sum of the weights of the links from the source community to the target community)
+  MapAccum<VERTEX<ResolvedEntity>, SumAccum<DOUBLE>> @@community_sum_in_map; // community ID -> the sum of the weights of the links inside the community
+  MapAccum<VERTEX<ResolvedEntity>, MapAccum<VERTEX<ResolvedEntity>, SumAccum<DOUBLE>>> @@source_target_k_in_map; // source community ID -> (target community ID -> the sum of the weights of the links from the source community to the target community)
   SumAccum<DOUBLE> @delta_Q_remove; // delta Q to remove the node from the previous community
   MaxAccum<MyTuple> @best_move; // best move of the node with the highest delta Q to move the isolated node into the new community
   MaxAccum<DOUBLE> @@min_double; // used to reset the @best_move
@@ -27,7 +27,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphRAG_louvain_1(
   DOUBLE wt = 1.0;
 
   // Initialization
-  All_Nodes = {{{entity_vertex_name}.*}};
+  All_Nodes = {{ResolvedEntity.*}};
   All_Nodes = SELECT s FROM All_Nodes:s -({relation_edge_name}:e)- :t
               ACCUM @@m += wt / 2,
                     s.@k += wt,
diff --git a/common/gsql/graphRAG/louvain/louvain_2_other_passes.gsql b/common/gsql/graphRAG/louvain_old/louvain_2_other_passes.gsql
similarity index 100%
rename from common/gsql/graphRAG/louvain/louvain_2_other_passes.gsql
rename to common/gsql/graphRAG/louvain_old/louvain_2_other_passes.gsql
diff --git a/common/gsql/graphRAG/louvain/louvain_3_final_community.gsql b/common/gsql/graphRAG/louvain_old/louvain_3_final_community.gsql
similarity index 100%
rename from common/gsql/graphRAG/louvain/louvain_3_final_community.gsql
rename to common/gsql/graphRAG/louvain_old/louvain_3_final_community.gsql
diff --git a/common/gsql/graphRAG/louvain/louvain_4_modularity_1_for_pass.gsql b/common/gsql/graphRAG/louvain_old/louvain_4_modularity_1_for_pass.gsql
similarity index 100%
rename from common/gsql/graphRAG/louvain/louvain_4_modularity_1_for_pass.gsql
rename to common/gsql/graphRAG/louvain_old/louvain_4_modularity_1_for_pass.gsql
diff --git a/common/gsql/graphRAG/louvain/louvain_4_modularity_2_final.gsql b/common/gsql/graphRAG/louvain_old/louvain_4_modularity_2_final.gsql
similarity index 100%
rename from common/gsql/graphRAG/louvain/louvain_4_modularity_2_final.gsql
rename to common/gsql/graphRAG/louvain_old/louvain_4_modularity_2_final.gsql
diff --git a/common/gsql/graphRAG/louvain/louvain_5_reset.gsql b/common/gsql/graphRAG/louvain_old/louvain_5_reset.gsql
similarity index 100%
rename from common/gsql/graphRAG/louvain/louvain_5_reset.gsql
rename to common/gsql/graphRAG/louvain_old/louvain_5_reset.gsql
diff --git a/common/gsql/supportai/SupportAI_Schema.gsql b/common/gsql/supportai/SupportAI_Schema.gsql
index 0e3cf6c3..1a705eaf 100644
--- a/common/gsql/supportai/SupportAI_Schema.gsql
+++ b/common/gsql/supportai/SupportAI_Schema.gsql
@@ -2,7 +2,7 @@ CREATE SCHEMA_CHANGE JOB add_supportai_schema {
     ADD VERTEX DocumentChunk(PRIMARY_ID id STRING, idx INT, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX Document(PRIMARY_ID id STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX Concept(PRIMARY_ID id STRING, description STRING, concept_type STRING, human_curated BOOL, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
-    ADD VERTEX Entity(PRIMARY_ID id STRING, definition STRING, description STRING, entity_type STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
+    ADD VERTEX Entity(PRIMARY_ID id STRING, definition STRING, description SET<STRING>, entity_type STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX Relationship(PRIMARY_ID id STRING, definition STRING, short_name STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX DocumentCollection(PRIMARY_ID id STRING, epoch_added UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX Content(PRIMARY_ID id STRING, text STRING, epoch_added UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
@@ -21,7 +21,7 @@ CREATE SCHEMA_CHANGE JOB add_supportai_schema {
 
     // GraphRAG
     ADD VERTEX Community(PRIMARY_ID id STRING, description INT) WITH PRIMARY_ID_AS_ATTRIBUTE="true";
-    ADD VERTEX ResolvedEntity(PRIMARY_ID id STRING, description STRING, entity_type STRING) WITH PRIMARY_ID_AS_ATTRIBUTE="true";
+    ADD VERTEX ResolvedEntity(PRIMARY_ID id STRING, entity_type STRING) WITH PRIMARY_ID_AS_ATTRIBUTE="true";
 
     ADD DIRECTED EDGE RELATIONSHIP(FROM Entity, TO Entity, relation_type STRING) WITH REVERSE_EDGE="reverse_RELATIONSHIP"; // TODO: check where knn algo writes results
     ADD DIRECTED EDGE RESOLVES_TO(FROM Entity, TO ResolvedEntity, relation_type STRING) WITH REVERSE_EDGE="reverse_RESOLVES_TO"; // Connect ResolvedEntities with their children entities
diff --git a/copilot/docs/notebooks/graphrag.ipynb b/copilot/docs/notebooks/graphrag.ipynb
index 38b4939b..bde1b78f 100644
--- a/copilot/docs/notebooks/graphrag.ipynb
+++ b/copilot/docs/notebooks/graphrag.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -24,7 +24,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -33,7 +33,7 @@
        "'The graph GraphRAG_pytgdocs is created.'"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -45,7 +45,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -54,18 +54,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "{'host_name': 'https://algotesting.i.tgcloud.io',\n",
-       " 'schema_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_schema].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_schema\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 0)\\\\nTrying to add local vertex \\'DocumentChunk\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Document\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Concept\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Entity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Relationship\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'DocumentCollection\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Content\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Community\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'ResolvedEntity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CONTENT\\' and its reverse edge \\'reverse_HAS_CONTENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_CHILD_OF\\' and its reverse edge \\'reverse_IS_CHILD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_HEAD_OF\\' and its reverse edge \\'reverse_IS_HEAD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_TAIL\\' and its reverse edge \\'reverse_HAS_TAIL\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_RELATIONSHIP\\' and its reverse edge \\'reverse_DESCRIBES_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_ENTITY\\' and its reverse edge \\'reverse_DESCRIBES_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_ENTITY\\' and its reverse edge \\'reverse_CONTAINS_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'MENTIONS_RELATIONSHIP\\' and its reverse edge \\'reverse_MENTIONS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_AFTER\\' and its reverse edge \\'reverse_IS_AFTER\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CHILD\\' and its reverse edge \\'reverse_HAS_CHILD\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_RELATIONSHIP\\' and its reverse edge \\'reverse_HAS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_DOCUMENT\\' and its reverse edge \\'reverse_CONTAINS_DOCUMENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RELATIONSHIP\\' and its reverse edge \\'reverse_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVES_TO\\' and its reverse edge \\'reverse_RESOLVES_TO\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVED_RELATIONSHIP\\' and its reverse edge \\'reverse_RESOLVED_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IN_COMMUNITY\\' and its reverse edge \\'reverse_IN_COMMUNITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 1\\\\nThe job add_supportai_schema completes in 2.335 seconds!\\\\nLocal schema change succeeded.\"',\n",
-       " 'index_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_indexes].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_indexes\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 1)\\\\nTrying to add index \\'doc_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_indexepoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 2\\\\nThe job add_supportai_indexes completes in 2.059 seconds!\\\\nLocal schema change succeeded.\"'}"
+       " 'schema_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_schema].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_schema\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 0)\\\\nTrying to add local vertex \\'DocumentChunk\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Document\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Concept\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Entity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Relationship\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'DocumentCollection\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Content\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Community\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'ResolvedEntity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CONTENT\\' and its reverse edge \\'reverse_HAS_CONTENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_CHILD_OF\\' and its reverse edge \\'reverse_IS_CHILD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_HEAD_OF\\' and its reverse edge \\'reverse_IS_HEAD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_TAIL\\' and its reverse edge \\'reverse_HAS_TAIL\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_RELATIONSHIP\\' and its reverse edge \\'reverse_DESCRIBES_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_ENTITY\\' and its reverse edge \\'reverse_DESCRIBES_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_ENTITY\\' and its reverse edge \\'reverse_CONTAINS_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'MENTIONS_RELATIONSHIP\\' and its reverse edge \\'reverse_MENTIONS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_AFTER\\' and its reverse edge \\'reverse_IS_AFTER\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CHILD\\' and its reverse edge \\'reverse_HAS_CHILD\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_RELATIONSHIP\\' and its reverse edge \\'reverse_HAS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_DOCUMENT\\' and its reverse edge \\'reverse_CONTAINS_DOCUMENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RELATIONSHIP\\' and its reverse edge \\'reverse_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVES_TO\\' and its reverse edge \\'reverse_RESOLVES_TO\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVED_RELATIONSHIP\\' and its reverse edge \\'reverse_RESOLVED_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IN_COMMUNITY\\' and its reverse edge \\'reverse_IN_COMMUNITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 1\\\\nThe job add_supportai_schema completes in 2.208 seconds!\\\\nLocal schema change succeeded.\"',\n",
+       " 'index_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_indexes].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_indexes\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 1)\\\\nTrying to add index \\'doc_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_indexepoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 2\\\\nThe job add_supportai_indexes completes in 3.025 seconds!\\\\nLocal schema change succeeded.\"'}"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -79,7 +79,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -95,18 +95,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'job_name': 'load_documents_content_json_a245f14bb5f443acaa051125e4d9a497',\n",
-       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_a245f14bb5f443acaa051125e4d9a497.stream.SupportAI_GraphRAG_pytgdocs_025b08b3cf60477dbbcfd22b4254d268.1722356202522',\n",
-       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_a245f14bb5f443acaa051125e4d9a497.stream.SupportAI_GraphRAG_pytgdocs_025b08b3cf60477dbbcfd22b4254d268.1722356202522'}"
+       "{'job_name': 'load_documents_content_json_b89acfebac9e4fb98efd20a49659808e',\n",
+       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_b89acfebac9e4fb98efd20a49659808e.stream.SupportAI_GraphRAG_pytgdocs_5698bff74d844534901cba9e1b3d55bf.1722466964295',\n",
+       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_b89acfebac9e4fb98efd20a49659808e.stream.SupportAI_GraphRAG_pytgdocs_5698bff74d844534901cba9e1b3d55bf.1722466964295'}"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -121,7 +121,41 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<Response [200 OK]>"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import httpx\n",
+    "import base64\n",
+    "\n",
+    "\n",
+    "def make_headers(conn: TigerGraphConnection):\n",
+    "    tkn = base64.b64encode(f\"{conn.username}:{conn.password}\".encode()).decode()\n",
+    "    headers = {\"Authorization\": f\"Basic {tkn}\"}\n",
+    "    return headers\n",
+    "\n",
+    "\n",
+    "httpx.get(\n",
+    "    \"http://localhost:8001/GraphRAG_pytgdocs/consistency_status/graphrag\",\n",
+    "    headers=make_headers(conn),\n",
+    ")\n",
+    "# conn.ai.forceConsistencyUpdate()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -131,7 +165,7 @@
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43masdf\u001b[49m\n",
+      "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43masdf\u001b[49m\n",
       "\u001b[0;31mNameError\u001b[0m: name 'asdf' is not defined"
      ]
     }
@@ -142,11 +176,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 30,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'job_name': 'load_documents_content_json_b89acfebac9e4fb98efd20a49659808e',\n",
+       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_b89acfebac9e4fb98efd20a49659808e.stream.SupportAI_GraphRAG_pytgdocs_5698bff74d844534901cba9e1b3d55bf.1722531204658',\n",
+       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_b89acfebac9e4fb98efd20a49659808e.stream.SupportAI_GraphRAG_pytgdocs_5698bff74d844534901cba9e1b3d55bf.1722531204658'}"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "for v in [\"Document\", \"Content\", \"DocumentChunk\",\"Entity\"]:\n",
+    "for v in [\"Document\", \"Content\", \"DocumentChunk\", \"Entity\",\"ResolvedEntity\"]:\n",
+    "# for v in [\"ResolvedEntity\"]:\n",
     "    try:\n",
     "        conn.delVertices(v)\n",
     "    except:\n",
@@ -170,10 +218,147 @@
    "source": [
     "conn.gsql(f\"\"\"\n",
     "USE GRAPH {conn.graphname}\n",
-    "DROP QUERY StreamDocIds\n",
-    "DROP QUERY StreamDocContent\n",
+    "DROP QUERY ResolveRelationships\n",
     "\"\"\")"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "KeyError",
+     "evalue": "'deleted_vertices'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[33], line 5\u001b[0m\n\u001b[1;32m      2\u001b[0m conn\u001b[38;5;241m.\u001b[39mgetToken()\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCommunity\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[1;32m      4\u001b[0m     \u001b[38;5;66;03m# for v in [\"ResolvedEntity\"]:\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m     \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdelVertices\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.venv/ml/lib/python3.11/site-packages/pyTigerGraph/pyTigerGraphVertex.py:688\u001b[0m, in \u001b[0;36mpyTigerGraphVertex.delVertices\u001b[0;34m(self, vertexType, where, limit, sort, permanent, timeout)\u001b[0m\n\u001b[1;32m    685\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mand\u001b[39;00m timeout \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m    686\u001b[0m     url \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m?\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m isFirst \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m&\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout=\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(timeout)\n\u001b[0;32m--> 688\u001b[0m ret \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_delete\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdeleted_vertices\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[1;32m    690\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m logger\u001b[38;5;241m.\u001b[39mlevel \u001b[38;5;241m==\u001b[39m logging\u001b[38;5;241m.\u001b[39mDEBUG:\n\u001b[1;32m    691\u001b[0m     logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreturn: \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(ret))\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'deleted_vertices'"
+     ]
+    }
+   ],
+   "source": [
+    "conn.graphname = \"Cora\"\n",
+    "conn.getToken()\n",
+    "for v in [\"Community\"]:\n",
+    "    # for v in [\"ResolvedEntity\"]:\n",
+    "    conn.delVertices(v)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import base64\n",
+    "import json\n",
+    "import httpx\n",
+    "import logging\n",
+    "\n",
+    "_ = logging.getLogger(__name__)\n",
+    "\n",
+    "\n",
+    "http_timeout = None\n",
+    "\n",
+    "\n",
+    "def make_headers(conn: TigerGraphConnection):\n",
+    "    if conn.apiToken is None or conn.apiToken == \"\":\n",
+    "        tkn = base64.b64encode(f\"{conn.username}:{conn.password}\".encode()).decode()\n",
+    "        headers = {\"Authorization\": f\"Basic {tkn}\"}\n",
+    "    else:\n",
+    "        headers = {\"Authorization\": f\"Bearer {conn.apiToken}\"}\n",
+    "\n",
+    "    return headers\n",
+    "\n",
+    "\n",
+    "def check_vertex_exists(conn, id):\n",
+    "    headers = make_headers(conn)\n",
+    "    with httpx.Client(timeout=http_timeout) as client:\n",
+    "        res = client.get(\n",
+    "            f\"{conn.restppUrl}/graph/{conn.graphname}/vertices/Entity/{id}\",\n",
+    "            headers=headers,\n",
+    "        )\n",
+    "\n",
+    "        res.raise_for_status()\n",
+    "    return res.json()\n",
+    "\n",
+    "\n",
+    "# r = check_vertex_exists(conn, \"asdfTigergraphexception\")\n",
+    "# print(json.dumps(r, indent=2), r[\"error\"])\n",
+    "r = check_vertex_exists(conn, \"Tigergraphexception\")\n",
+    "print(json.dumps(r, indent=2), r[\"error\"])\n",
+    "r[\"results\"][0][\"attributes\"][\"description\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def map_attrs(attributes: dict):\n",
+    "    # map attrs\n",
+    "    attrs = {}\n",
+    "    for k, v in attributes.items():\n",
+    "        if isinstance(v, tuple):\n",
+    "            attrs[k] = {\"value\": v[0], \"op\": v[1]}\n",
+    "        elif isinstance(v, dict):\n",
+    "            attrs[k] = {\n",
+    "                \"value\": {\"keylist\": list(v.keys()), \"valuelist\": list(v.values())}\n",
+    "            }\n",
+    "        else:\n",
+    "            attrs[k] = {\"value\": v}\n",
+    "    return attrs\n",
+    "\n",
+    "\n",
+    "def process_id(v_id: str):\n",
+    "    return v_id.replace(\" \", \"_\").replace(\"/\", \"\")\n",
+    "\n",
+    "\n",
+    "def a(vertex_id=\"Post /Requesttoken\"):\n",
+    "    vertex_id = process_id(vertex_id)\n",
+    "    attributes = {  # attrs\n",
+    "        \"description\": [\"test\"],\n",
+    "        \"epoch_added\": int(time.time()),\n",
+    "    }\n",
+    "\n",
+    "    vertex_id = vertex_id.replace(\" \", \"_\")\n",
+    "    attrs = map_attrs(attributes)\n",
+    "    data = json.dumps({\"vertices\": {\"Entity\": {vertex_id: attrs}}})\n",
+    "    headers = make_headers(conn)\n",
+    "    with httpx.Client(timeout=http_timeout) as client:\n",
+    "        res = client.post(\n",
+    "            f\"{conn.restppUrl}/graph/{conn.graphname}\", data=data, headers=headers\n",
+    "        )\n",
+    "\n",
+    "        res.raise_for_status()\n",
+    "\n",
+    "    return res.json()\n",
+    "\n",
+    "\n",
+    "a()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from urllib import parse\n",
+    "\n",
+    "v_id = \"Post_/Requesttoken\"\n",
+    "v_id = process_id(v_id)\n",
+    "print(v_id)\n",
+    "\n",
+    "r = check_vertex_exists(conn, v_id)\n",
+    "print(json.dumps(r, indent=2), r[\"error\"])\n",
+    "r[\"results\"][0][\"attributes\"][\"description\"]"
+   ]
   }
  ],
  "metadata": {
diff --git a/eventual-consistency-service/app/graphrag/graph_rag.py b/eventual-consistency-service/app/graphrag/graph_rag.py
index 7e67b342..4403756d 100644
--- a/eventual-consistency-service/app/graphrag/graph_rag.py
+++ b/eventual-consistency-service/app/graphrag/graph_rag.py
@@ -9,7 +9,7 @@
 from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore
 from common.extractors.BaseExtractor import BaseExtractor
 from graphrag import workers
-from graphrag.util import http_timeout, init, make_headers, stream_doc_ids
+from graphrag.util import http_timeout, init, make_headers, stream_ids
 from pyTigerGraph import TigerGraphConnection
 
 http_logs = logging.getLogger("httpx")
@@ -29,15 +29,15 @@ async def stream_docs(
     """
     logger.info("streaming docs")
     headers = make_headers(conn)
-    for i in range(ttl_batches):
-        doc_ids = await stream_doc_ids(conn, i, ttl_batches)
-        if doc_ids["error"]:
-            # continue to the next batch.
-            # These docs will not be marked as processed, so the ecc will process it eventually.
-            continue
+    async with httpx.AsyncClient(timeout=http_timeout) as client:
+        for i in range(ttl_batches):
+            doc_ids = await stream_ids(conn, "Document", i, ttl_batches)
+            if doc_ids["error"]:
+                # continue to the next batch.
+                # These docs will not be marked as processed, so the ecc will process it eventually.
+                continue
 
-        for d in doc_ids["ids"]:
-            async with httpx.AsyncClient(timeout=http_timeout) as client:
+            for d in doc_ids["ids"]:
                 try:
                     res = await client.get(
                         f"{conn.restppUrl}/query/{conn.graphname}/StreamDocContent/",
@@ -104,19 +104,13 @@ async def upsert(upsert_chan: Channel):
 
     logger.info("Reading from upsert channel")
     # consume task queue
-    upsert_tasks = []
     async with asyncio.TaskGroup() as grp:
         async for func, args in upsert_chan:
             logger.info(f"{func.__name__}, {args[1]}")
-            # continue
             # execute the task
-            t = grp.create_task(func(*args))
-            upsert_tasks.append(t)
+            grp.create_task(func(*args))
 
     logger.info(f"upsert done")
-    # do something with doc_tasks?
-    # for t in upsert_tasks:
-    #     logger.info(t.result())
 
 
 async def embed(
@@ -131,7 +125,6 @@ async def embed(
     async with asyncio.TaskGroup() as grp:
         # consume task queue
         async for v_id, content, index_name in embed_chan:
-            # continue
             embedding_store = index_stores[f"{graphname}_{index_name}"]
             logger.info(f"Embed to {graphname}_{index_name}: {v_id}")
             grp.create_task(
@@ -173,49 +166,136 @@ async def extract(
     embed_chan.close()
 
 
-async def run(graphname: str, conn: TigerGraphConnection):
+async def stream_entities(
+    conn: TigerGraphConnection,
+    entity_chan: Channel,
+    ttl_batches: int = 50,
+):
+    """
+    Streams entity IDs from the grpah
     """
-    ecc flow
+    logger.info("streaming entities")
+    for i in range(ttl_batches):
+        ids = await stream_ids(conn, "Entity", i, ttl_batches)
+        if ids["error"]:
+            # continue to the next batch.
+            # These docs will not be marked as processed, so the ecc will process it eventually.
+            continue
 
-    initialize_eventual_consistency_checker
-        instantiates ecc object
-        writes checker to checker dict
-        runs ecc_obj.initialize()
+        for i in ids["ids"]:
+            if len(i) > 0:
+                await entity_chan.put(i)
+                # break
+        # break  # one batch
+
+    logger.info("stream_enities done")
+    # close the docs chan -- this function is the only sender
+    logger.info("closing entities chan")
+    entity_chan.close()
 
-    ECC.initialize
-        loops and calls fetch and process
 
+async def resolve_entities(
+    conn: TigerGraphConnection,
+    emb_store: MilvusEmbeddingStore,
+    entity_chan: Channel,
+    upsert_chan: Channel,
+):
     """
+    Merges entities into their ResolvedEntity form
+        Groups what should be the same entity into a resolved entity (e.g. V_type and VType should be merged)
 
-    extractor, index_stores = await init(conn)
-    # return
-    start = time.perf_counter()
-
-    tasks = []
-    docs_chan = Channel(1)
-    embed_chan = Channel(100)
-    upsert_chan = Channel(100)
-    extract_chan = Channel(100)
+    Copies edges between entities to their respective ResolvedEntities
+    """
     async with asyncio.TaskGroup() as grp:
-        # get docs
-        t = grp.create_task(stream_docs(conn, docs_chan, 10))
-        tasks.append(t)
-        # process docs
-        t = grp.create_task(
-            chunk_docs(conn, docs_chan, embed_chan, upsert_chan, extract_chan)
-        )
-        tasks.append(t)
-        # upsert chunks
-        t = grp.create_task(upsert(upsert_chan))
-        tasks.append(t)
-        # # embed
-        t = grp.create_task(embed(embed_chan, index_stores, graphname))
-        tasks.append(t)
-        # extract entities
-        t = grp.create_task(
-            extract(extract_chan, upsert_chan, embed_chan, extractor, conn)
+        # for every entity
+        async for entity_id in entity_chan:
+            print(f"***Etity ID from chan {entity_id}")
+            grp.create_task(
+                workers.resolve_entity(conn, upsert_chan, emb_store, entity_id)
+            )
+    logger.info("closing upsert_chan")
+    upsert_chan.close()
+
+    # Copy RELATIONSHIP edges to RESOLVED_RELATIONSHIP
+    headers = make_headers(conn)
+    async with httpx.AsyncClient(timeout=http_timeout) as client:
+        res = await client.get(
+            f"{conn.restppUrl}/query/{conn.graphname}/ResolveRelationships/",
+            headers=headers,
         )
-        tasks.append(t)
-    end = time.perf_counter()
+        res.raise_for_status()
+
+
+async def communities(conn: TigerGraphConnection):
+    pass
+    # Setup
+
+
+async def run(graphname: str, conn: TigerGraphConnection):
+    """
+    Set up GraphRAG:
+        - Install necessary queries.
+        - Process the documents into:
+            - chunks
+            - embeddings
+            - entities/relationships (and their embeddings)
+            - upsert everything to the graph
+    """
+
+    extractor, index_stores = await init(conn)
+    init_start = time.perf_counter()
+
+    if False:
+        docs_chan = Channel(1)
+        embed_chan = Channel(100)
+        upsert_chan = Channel(100)
+        extract_chan = Channel(100)
+        async with asyncio.TaskGroup() as grp:
+            # get docs
+            grp.create_task(stream_docs(conn, docs_chan, 10))
+            # process docs
+            grp.create_task(
+                chunk_docs(conn, docs_chan, embed_chan, upsert_chan, extract_chan)
+            )
+            # upsert chunks
+            grp.create_task(upsert(upsert_chan))
+            # embed
+            grp.create_task(embed(embed_chan, index_stores, graphname))
+            # extract entities
+            grp.create_task(
+                extract(extract_chan, upsert_chan, embed_chan, extractor, conn)
+            )
+    init_end = time.perf_counter()
+
+    # Entity Resolution
+    entity_start = time.perf_counter()
+
+    if False:
+        entities_chan = Channel(100)
+        upsert_chan = Channel(100)
+        async with asyncio.TaskGroup() as grp:
+            grp.create_task(stream_entities(conn, entities_chan, 50))
+            grp.create_task(
+                resolve_entities(
+                    conn,
+                    index_stores[f"{conn.graphname}_Entity"],
+                    entities_chan,
+                    upsert_chan,
+                )
+            )
+            grp.create_task(upsert(upsert_chan))
+    entity_end = time.perf_counter()
 
-    logger.info(f"DONE. graphrag.run elapsed: {end-start}")
+    # Community Detection
+    community_start = time.perf_counter()
+    if True:
+        await communities(conn) 
+
+    community_end = time.perf_counter()
+
+    # Community Summarization
+    end = time.perf_counter()
+    logger.info(f"DONE. graphrag system initializer dT: {init_end-init_start}")
+    logger.info(f"DONE. graphrag entity resolution dT: {entity_end-entity_start}")
+    logger.info(f"DONE. graphrag initializer dT: {community_end-community_start}")
+    logger.info(f"DONE. graphrag.run() total time elaplsed: {end-init_start}")
diff --git a/eventual-consistency-service/app/graphrag/util.py b/eventual-consistency-service/app/graphrag/util.py
index 8f2c2141..74dbc56d 100644
--- a/eventual-consistency-service/app/graphrag/util.py
+++ b/eventual-consistency-service/app/graphrag/util.py
@@ -3,6 +3,7 @@
 import json
 import logging
 import traceback
+from glob import glob
 
 import httpx
 from common.config import (
@@ -42,6 +43,7 @@ async def install_queries(
 
     for t in tasks:
         res = t.result()
+        print(res)
         # stop system if a required query doesn't install
         if res["error"]:
             raise Exception(res["message"])
@@ -63,9 +65,14 @@ async def init(
         # "common/gsql/supportai/Update_Vertices_Processing_Status",
         # "common/gsql/supportai/ECC_Status",
         # "common/gsql/supportai/Check_Nonexistent_Vertices",
-        "common/gsql/graphRAG/StreamDocIds",
+        "common/gsql/graphRAG/StreamIds",
         "common/gsql/graphRAG/StreamDocContent",
+        "common/gsql/graphRAG/SetEpochProcessing",
+        "common/gsql/graphRAG/ResolveRelationships",
     ]
+    # add louvain to queries
+    q = [x.split('.gsql')[0] for x in glob("common/gsql/graphRAG/louvain/*")]
+    requried_queries.extend(q)
     await install_queries(requried_queries, conn)
 
     # extractor
@@ -101,13 +108,14 @@ async def init(
                 vector_field=milvus_config.get("vector_field", "document_vector"),
                 text_field=milvus_config.get("text_field", "document_content"),
                 vertex_field=vertex_field,
+                drop_old=False,
             )
 
             LogWriter.info(f"Initializing {name}")
             # init collection if it doesn't exist
             if not s.check_collection_exists():
                 tg.create_task(init_embedding_index(s, vertex_field))
-            
+
             index_stores[name] = s
 
     return extractor, index_stores
@@ -123,29 +131,28 @@ def make_headers(conn: TigerGraphConnection):
     return headers
 
 
-async def stream_doc_ids(
-    conn: TigerGraphConnection, current_batch: int, ttl_batches: int
+async def stream_ids(
+    conn: TigerGraphConnection, v_type: str, current_batch: int, ttl_batches: int
 ) -> dict[str, str | list[str]]:
     headers = make_headers(conn)
 
     try:
         async with httpx.AsyncClient(timeout=http_timeout) as client:
             res = await client.post(
-                f"{conn.restppUrl}/query/{conn.graphname}/StreamDocIds",
+                f"{conn.restppUrl}/query/{conn.graphname}/StreamIds",
                 params={
                     "current_batch": current_batch,
                     "ttl_batches": ttl_batches,
+                    "v_type": v_type,
                 },
                 headers=headers,
             )
-        ids = res.json()["results"][0]["@@doc_ids"]
+        ids = res.json()["results"][0]["@@ids"]
         return {"error": False, "ids": ids}
 
     except Exception as e:
         exc = traceback.format_exc()
-        LogWriter.error(
-            f"/{conn.graphname}/query/StreamDocIds\nException Trace:\n{exc}"
-        )
+        LogWriter.error(f"/{conn.graphname}/query/StreamIds\nException Trace:\n{exc}")
 
         return {"error": True, "message": str(e)}
 
@@ -165,16 +172,24 @@ def map_attrs(attributes: dict):
     return attrs
 
 
+def process_id(v_id: str):
+    v_id = v_id.replace(" ", "_").replace("/", "")
+    if v_id == "''" or v_id == '""':
+        return ""
+
+    return v_id
+
+
 async def upsert_vertex(
     conn: TigerGraphConnection,
     vertex_type: str,
     vertex_id: str,
     attributes: dict,
 ):
+    vertex_id = vertex_id.replace(" ", "_")
     attrs = map_attrs(attributes)
     data = json.dumps({"vertices": {vertex_type: {vertex_id: attrs}}})
     headers = make_headers(conn)
-    # print("upsert vertex>>>", vertex_id)
     async with httpx.AsyncClient(timeout=http_timeout) as client:
         res = await client.post(
             f"{conn.restppUrl}/graph/{conn.graphname}", data=data, headers=headers
@@ -183,6 +198,18 @@ async def upsert_vertex(
         res.raise_for_status()
 
 
+async def check_vertex_exists(conn, v_id: str):
+    headers = make_headers(conn)
+    async with httpx.AsyncClient(timeout=http_timeout) as client:
+        res = await client.get(
+            f"{conn.restppUrl}/graph/{conn.graphname}/vertices/Entity/{v_id}",
+            headers=headers,
+        )
+
+        res.raise_for_status()
+    return res.json()
+
+
 async def upsert_edge(
     conn: TigerGraphConnection,
     src_v_type: str,
@@ -196,6 +223,8 @@ async def upsert_edge(
         attrs = {}
     else:
         attrs = map_attrs(attributes)
+    src_v_id = src_v_id.replace(" ", "_")
+    tgt_v_id = tgt_v_id.replace(" ", "_")
     data = json.dumps(
         {
             "edges": {
@@ -212,7 +241,6 @@ async def upsert_edge(
         }
     )
     headers = make_headers(conn)
-    # print("upsert edge >>>", src_v_id, tgt_v_id)
     async with httpx.AsyncClient(timeout=http_timeout) as client:
         res = await client.post(
             f"{conn.restppUrl}/graph/{conn.graphname}", data=data, headers=headers
diff --git a/eventual-consistency-service/app/graphrag/workers.py b/eventual-consistency-service/app/graphrag/workers.py
index b7267b60..4c1174df 100644
--- a/eventual-consistency-service/app/graphrag/workers.py
+++ b/eventual-consistency-service/app/graphrag/workers.py
@@ -11,8 +11,8 @@
 from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore
 from common.extractors.BaseExtractor import BaseExtractor
 from common.logs.logwriter import LogWriter
-from graphrag import util  # import upsert_edge, upsert_vertex
-from langchain_community.graphs.graph_document import GraphDocument
+from graphrag import util
+from langchain_community.graphs.graph_document import GraphDocument, Node
 from pyTigerGraph import TigerGraphConnection
 
 vertex_field = milvus_config.get("vertex_field", "vertex_id")
@@ -67,7 +67,7 @@ async def chunk_doc(
     """
     chunker = ecc_util.get_chunker()
     chunks = chunker.chunk(doc["attributes"]["text"])
-    v_id = doc["v_id"]
+    v_id = util.process_id(doc["v_id"])
     logger.info(f"Chunking {v_id}")
     for i, chunk in enumerate(chunks):
         chunk_id = f"{v_id}_chunk_{i}"
@@ -145,6 +145,17 @@ async def embed(
     await embed_store.aadd_embeddings([(content, vec)], [{vertex_field: v_id}])
 
 
+async def get_vert_desc(conn, v_id, node: Node):
+    desc = [node.properties.get("description", "")]
+    exists = await util.check_vertex_exists(conn, v_id)
+    # if vertex exists, get description content and append this description to it
+    if not exists["error"]:
+        # dedup descriptions
+        desc.extend(exists["results"][0]["attributes"]["description"])
+        desc = list(set(desc))
+    return desc
+
+
 async def extract(
     upsert_chan: Channel,
     embed_chan: Channel,
@@ -159,12 +170,22 @@ async def extract(
     for doc in extracted:
         for node in doc.nodes:
             logger.info(f"extract writes entity vert to upsert\nNode: {node.id}")
-            v_id = str(node.id)
-            desc = node.properties.get("description", "")
+            v_id = util.process_id(str(node.id))
+            if len(v_id) == 0:
+                continue
+            desc = await get_vert_desc(conn, v_id, node)
+
+            # embed the entity
+            # embed with the v_id if the description is blank
+            if len(desc[0]):
+                await embed_chan.put((v_id, v_id, "Entity"))
+            else:
+                # (v_id, content, index_name)
+                await embed_chan.put((v_id, desc[0], "Entity"))
+
             await upsert_chan.put(
                 (
                     util.upsert_vertex,  # func to call
-                    # conn, v_id, chunk_id, chunk
                     (
                         conn,
                         "Entity",  # v_type
@@ -188,33 +209,134 @@ async def extract(
                         chunk_id,  # src_id
                         "CONTAINS_ENTITY",  # edge_type
                         "Entity",  # tgt_type
-                        str(node.id),  # tgt_id
+                        v_id,  # tgt_id
                         None,  # attributes
                     ),
                 )
             )
 
-            # embed the entity
-            # (v_id, content, index_name)
-            await embed_chan.put((v_id, desc, "Entity"))
-
         for edge in doc.relationships:
             logger.info(
                 f"extract writes relates edge to upsert\n{edge.source.id} -({edge.type})->  {edge.target.id}"
             )
+            # upsert verts first to make sure their ID becomes an attr
+            v_id = util.process_id(edge.source.id)  # src_id
+            if len(v_id) == 0:
+                continue
+            desc = await get_vert_desc(conn, v_id, edge.source)
+            await upsert_chan.put(
+                (
+                    util.upsert_vertex,  # func to call
+                    (
+                        conn,
+                        "Entity",  # v_type
+                        v_id,
+                        {  # attrs
+                            "description": desc,
+                            "epoch_added": int(time.time()),
+                        },
+                    ),
+                )
+            )
+            v_id = util.process_id(edge.target.id)
+            if len(v_id) == 0:
+                continue
+            desc = await get_vert_desc(conn, v_id, edge.target)
+            await upsert_chan.put(
+                (
+                    util.upsert_vertex,  # func to call
+                    (
+                        conn,
+                        "Entity",  # v_type
+                        v_id,  # src_id
+                        {  # attrs
+                            "description": desc,
+                            "epoch_added": int(time.time()),
+                        },
+                    ),
+                )
+            )
+
+            # upsert the edge between the two entities
             await upsert_chan.put(
                 (
                     util.upsert_edge,
                     (
                         conn,
                         "Entity",  # src_type
-                        edge.source.id,  # src_id
+                        util.process_id(edge.source.id),  # src_id
                         "RELATIONSHIP",  # edgeType
                         "Entity",  # tgt_type
-                        edge.target.id,  # tgt_id
+                        util.process_id(edge.target.id),  # tgt_id
                         {"relation_type": edge.type},  # attributes
                     ),
                 )
             )
             # embed "Relationship",
             # (v_id, content, index_name)
+
+
+async def resolve_entity(
+    conn: TigerGraphConnection,
+    upsert_chan: Channel,
+    emb_store: MilvusEmbeddingStore,
+    entity_id: str,
+):
+    """
+    get all vectors of E (one name can have multiple discriptions)
+    get ents close to E
+    for e in ents:
+        if e is 95% similar to E and edit_dist(E,e) <=3:
+            merge
+            mark e as processed
+
+    mark as processed
+    """
+    results = await emb_store.aget_k_closest(entity_id)
+    if len(results) == 0:
+        logger.error(
+            f"aget_k_closest should, minimally, return the entity itself.\n{results}"
+        )
+        raise Exception()
+    if entity_id == "Dataframe":
+        print("result:", entity_id, results)
+
+    # merge all entities into the ResolvedEntity vertex
+    # use the longest v_id as the resolved entity's v_id
+    resolved_entity_id = ""
+    for v in results:
+        # v_id = v.metadata["vertex_id"]
+        if len(v) > len(resolved_entity_id):
+            resolved_entity_id = v
+
+    # upsert the resolved entity
+    await upsert_chan.put(
+        (
+            util.upsert_vertex,  # func to call
+            (
+                conn,
+                "ResolvedEntity",  # v_type
+                resolved_entity_id,  # v_id
+                {  # attrs
+                    "description": []
+                },
+            ),
+        )
+    )
+
+    # create RESOLVES_TO edges from each entity to the ResolvedEntity
+    for v in results:
+        await upsert_chan.put(
+            (
+                util.upsert_edge,
+                (
+                    conn,
+                    "Entity",  # src_type
+                    v,  # src_id
+                    "RESOLVES_TO",  # edge_type
+                    "ResolvedEntity",  # tgt_type
+                    resolved_entity_id,  # tgt_id
+                    None,  # attributes
+                ),
+            )
+        )

From 8ab8774cc160445a1602c18ddf2b9e7bc1b87a35 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Fri, 9 Aug 2024 18:47:13 -0400
Subject: [PATCH 09/53] starting to upsert community summaries

---
 common/embeddings/embedding_services.py       |   2 +-
 .../gsql/graphRAG/ResolveRelationships.gsql   |   2 +-
 .../gsql/graphRAG/get_community_children.gsql |  12 ++
 common/gsql/graphRAG/leven.cpp                |  59 ------
 .../louvain/graphrag_louvain_communities.gsql | 199 ++++++++++++++++++
 .../louvain/graphrag_louvain_init.gsql        | 185 ++++++++++++++++
 common/gsql/graphRAG/louvain/louvain1.gsql    |  17 --
 common/gsql/graphRAG/louvain/modularity.gsql  |  49 +++++
 .../graphRAG/louvain/stream_community.gsql    |   9 +
 common/gsql/supportai/SupportAI_Schema.gsql   |  14 +-
 common/py_schemas/tool_io_schemas.py          |  25 ++-
 copilot/docs/notebooks/graphrag.ipynb         | 127 +++++------
 eventual-consistency-service/app/ecc_util.py  |  33 ++-
 .../app/graphrag/community_summarizer.py      | 138 ++++++++++++
 .../app/graphrag/graph_rag.py                 | 158 ++++++++++++--
 .../app/graphrag/util.py                      |  63 ++++--
 .../app/graphrag/workers.py                   |  63 +++++-
 eventual-consistency-service/requirements.txt |  34 +--
 18 files changed, 968 insertions(+), 221 deletions(-)
 create mode 100644 common/gsql/graphRAG/get_community_children.gsql
 delete mode 100644 common/gsql/graphRAG/leven.cpp
 create mode 100644 common/gsql/graphRAG/louvain/graphrag_louvain_communities.gsql
 create mode 100644 common/gsql/graphRAG/louvain/graphrag_louvain_init.gsql
 delete mode 100644 common/gsql/graphRAG/louvain/louvain1.gsql
 create mode 100644 common/gsql/graphRAG/louvain/modularity.gsql
 create mode 100644 common/gsql/graphRAG/louvain/stream_community.gsql
 create mode 100644 eventual-consistency-service/app/graphrag/community_summarizer.py

diff --git a/common/embeddings/embedding_services.py b/common/embeddings/embedding_services.py
index dd506670..13c2cfd0 100644
--- a/common/embeddings/embedding_services.py
+++ b/common/embeddings/embedding_services.py
@@ -134,7 +134,7 @@ def __init__(self, config):
         super().__init__(
             config, model_name=config.get("model_name", "OpenAI gpt-4-0613")
         )
-        from langchain.embeddings import OpenAIEmbeddings
+        from langchain_openai import OpenAIEmbeddings
 
         self.embeddings = OpenAIEmbeddings()
 
diff --git a/common/gsql/graphRAG/ResolveRelationships.gsql b/common/gsql/graphRAG/ResolveRelationships.gsql
index d3c69297..6a0e515d 100644
--- a/common/gsql/graphRAG/ResolveRelationships.gsql
+++ b/common/gsql/graphRAG/ResolveRelationships.gsql
@@ -13,7 +13,7 @@ CREATE DISTRIBUTED QUERY ResolveRelationships(BOOL printResults=FALSE) SYNTAX V2
     REs = SELECT re1 FROM REs:re1 -(<RESOLVES_TO)- Entity:e -(RELATIONSHIP>:rel)- Entity:e_tgt -(RESOLVES_TO>:r)- ResolvedEntity:re2
           // Connect the The first RE to the second RE
           ACCUM 
-            INSERT INTO RESOLVED_RELATIONSHIP(FROM,TO) VALUES(re1, re2);
+            INSERT INTO RESOLVED_RELATIONSHIP(FROM,TO, relation_type) VALUES(re1, re2, rel.relation_type);
 
 
     IF printResults THEN
diff --git a/common/gsql/graphRAG/get_community_children.gsql b/common/gsql/graphRAG/get_community_children.gsql
new file mode 100644
index 00000000..7913e1b7
--- /dev/null
+++ b/common/gsql/graphRAG/get_community_children.gsql
@@ -0,0 +1,12 @@
+CREATE DISTRIBUTED QUERY get_community_children(Vertex<Community> comm, UINT iter) SYNTAX V2{
+    Comms = {comm};
+
+    IF iter > 1 THEN
+        Comms = SELECT t FROM Comms:c -(<HAS_PARENT)- Community:t;
+        PRINT Comms[Comms.description as description] as children;
+    ELSE
+        Ents = SELECT t FROM Comms:c -(_>)- ResolvedEntity -(_>)- Entity:t;
+
+        PRINT Ents[Ents.description as description] as children;
+    END;
+}
diff --git a/common/gsql/graphRAG/leven.cpp b/common/gsql/graphRAG/leven.cpp
deleted file mode 100644
index 10c45669..00000000
--- a/common/gsql/graphRAG/leven.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-#include <iostream>
-#include <ostream>
-
-// Returns the Levenshtein distance between word1 and word2.
-int levenshteinDist(std::string word1, std::string word2) {
-    int size1 = word1.size();
-    int size2 = word2.size();
-    int verif[size1 + 1][size2 + 1]; // Verification matrix i.e. 2D array
-    // which will store the calculated distance.
-
-    // If one of the words has zero length, the distance is equal to the size of
-    // the other word.
-    if (size1 == 0) return size2;
-    if (size2 == 0) return size1;
-
-    // Sets the first row and the first column of the verification matrix with
-    // the numerical order from 0 to the length of each word.
-    for (int i = 0; i <= size1; i++) verif[i][0] = i;
-    for (int j = 0; j <= size2; j++) verif[0][j] = j;
-
-    // Verification step / matrix filling.
-    for (int i = 1; i <= size1; i++) {
-        for (int j = 1; j <= size2; j++) {
-            // Sets the modification cost.
-            // 0 means no modification (i.e. equal letters) and 1 means that a
-            // modification is needed (i.e. unequal letters).
-            int cost = (word2[j - 1] == word1[i - 1]) ? 0 : 1;
-
-            // Sets the current position of the matrix as the minimum value
-            // between a (deletion), b (insertion) and c (substitution). a = the
-            // upper adjacent value plus 1: verif[i - 1][j] + 1 b = the left
-            // adjacent value plus 1: verif[i][j - 1] + 1 c = the upper left
-            // adjacent value plus the modification cost: verif[i - 1][j - 1] +
-            // cost
-            verif[i][j] =
-                std::min(std::min(verif[i - 1][j] + 1, verif[i][j - 1] + 1),
-                         verif[i - 1][j - 1] + cost);
-        }
-    }
-
-    // The last position of the matrix will contain the Levenshtein distance.
-    return verif[size1][size2];
-}
-
-int main() {
-    std::string word1, word2;
-
-    std::cout << "Please input the first word: " << std::endl;
-    std::cin >> word1;
-    std::cout << "Please input the second word: " << std::endl;
-    std::cin >> word2;
-
-    // cout << "The number of modifications needed in order to make one word "
-    //         "equal to the other is: "
-    std::cout << "The edit distance is: " << levenshteinDist(word1, word2)
-              << std::endl;
-
-    return 0;
-}
diff --git a/common/gsql/graphRAG/louvain/graphrag_louvain_communities.gsql b/common/gsql/graphRAG/louvain/graphrag_louvain_communities.gsql
new file mode 100644
index 00000000..366b7ea7
--- /dev/null
+++ b/common/gsql/graphRAG/louvain/graphrag_louvain_communities.gsql
@@ -0,0 +1,199 @@
+CREATE DISTRIBUTED QUERY graphrag_louvain_communities(UINT iteration=1, UINT max_hop = 10, UINT n_batches = 1) SYNTAX V2{
+    /*
+     * This is the same query as tg_louvain, just that Paper-related schema
+     * are changed to Community-related schema
+     *
+     * For the first call to this query, iteration = 1
+     */
+    TYPEDEF TUPLE <DOUBLE delta_q, VERTEX<Community> community, STRING ext_vid> Move;
+    SumAccum<DOUBLE> @@m; // the sum of the weights of all the links in the network
+    MinAccum<VERTEX<Community>> @community_id; // the community ID of the node
+    MinAccum<STRING> @community_vid; // the community ID of the node
+    SumAccum<DOUBLE> @k; // the sum of the weights of the links incident to the node
+    SumAccum<DOUBLE> @k_in; // the sum of the weights of the links inside the previous community of the node
+    SumAccum<DOUBLE> @k_self_loop; // the weight of the self-loop link
+    MapAccum<VERTEX<Community>, SumAccum<DOUBLE>> @community_k_in_map; // the community of the neighbors of the nodes -> the sum of the weights of the links inside the community
+    MapAccum<VERTEX<Community>, SumAccum<DOUBLE>> @@community_sum_total_map; // community ID C -> the sum of the weights of the links incident to nodes in C
+    SumAccum<DOUBLE> @community_sum_total; // the sum of the weights of the links incident to nodes in the community of the node
+    MapAccum<VERTEX<Community>, SumAccum<DOUBLE>> @@community_sum_in_map; // community ID -> the sum of the weights of the links inside the community
+    MapAccum<STRING, MapAccum<STRING, SumAccum<DOUBLE>>> @@source_target_k_in_map; // source community ID -> (target community ID -> the sum of the weights of the links from the source community to the target community)
+    SumAccum<DOUBLE> @delta_Q_remove; // delta Q to remove the node from the previous community
+    MaxAccum<Move> @best_move; // best move of the node with the highest delta Q to move the isolated node into the new community
+    MaxAccum<DOUBLE> @@min_double; // used to reset the @best_move
+    SumAccum<INT> @@move_cnt;
+    OrAccum @to_change_community, @is_current_iter, @has_parent;
+    SumAccum<INT> @batch_id;
+    MinAccum<INT> @vid;
+
+    AllNodes = {Community.*};
+
+    // Get communities of the current iteration
+    AllNodes = SELECT s FROM AllNodes:s
+               WHERE s.iteration == iteration
+               ACCUM s.@is_current_iter += TRUE;
+               
+    // init
+    z = SELECT s FROM AllNodes:s -(_>:e)- Community:t
+        WHERE s.@is_current_iter AND t.@is_current_iter
+        ACCUM s.@k += e.weight,
+              @@m += e.weight/2,
+              IF s == t THEN // self loop
+                  s.@k_self_loop += e.weight
+              END
+        POST-ACCUM 
+            s.@community_id = s, // assign node to its own community
+            s.@community_vid = to_string(s.id), // external id
+            s.@vid = getvid(s), // internal id (used in batching)
+            s.@batch_id = s.@vid % n_batches; // get batch number
+
+    IF @@m < 0.00000000001 THEN
+      PRINT "Warning: the sum of the weights in the edges should be greater than zero!";
+      RETURN;
+    END;
+
+    // Local moving
+    INT hop = 0;
+    Candidates = AllNodes;
+    WHILE Candidates.size() > 0 AND hop < max_hop DO
+        hop += 1; 
+        IF hop == 1 THEN // first iteration
+            ChangedNodes = SELECT s FROM Candidates:s -(_>:e)- Community:t
+                           WHERE s.@community_id != t.@community_id // can't move within the same community
+                             AND s.@is_current_iter AND t.@is_current_iter // only use Communities in the current iteration
+                           ACCUM 
+                               DOUBLE dq = 1 - s.@k * t.@k / (2 * @@m),
+                               s.@best_move += Move(dq, t.@community_id, t.@community_vid) // find the best move
+                           POST-ACCUM
+                               IF s.@best_move.delta_q > 0 THEN // if the move increases dq
+                                  s.@to_change_community += TRUE
+                               END
+                           HAVING s.@to_change_community == TRUE; // only select nodes that will move
+        ELSE // other iterations
+            // Calculate sum_total of links in each community
+            Tmp = SELECT s FROM AllNodes:s
+                  POST-ACCUM
+                      @@community_sum_total_map += (s.@community_id -> s.@k);
+            // store community's total edges in each vert (easier access)
+            Tmp = SELECT s FROM AllNodes:s
+                  POST-ACCUM
+                      s.@community_sum_total = @@community_sum_total_map.get(s.@community_id);
+            @@community_sum_total_map.clear();
+
+            // find the best move
+            ChangedNodes = {};
+
+            // process nodes in batch
+            FOREACH batch_id IN RANGE[0, n_batches-1] DO
+                Nodes = SELECT s FROM Candidates:s -(_>:e)- Community:t
+                        WHERE s.@batch_id == batch_id
+                          AND s.@is_current_iter AND t.@is_current_iter // only use Communities in the current iteration
+                        ACCUM
+                            IF s.@community_id == t.@community_id THEN 
+                                // add edge weights connected to s
+                                s.@k_in += e.weight
+                            ELSE
+                                // add edge weights connecetd to t
+                                s.@community_k_in_map += (t.@community_id -> e.weight)
+                            END
+                        POST-ACCUM
+                            // ∆Q if s is moved out of its current community
+                            s.@delta_Q_remove = 2 * s.@k_self_loop - 2 * s.@k_in + s.@k * (s.@community_sum_total - s.@k) / @@m,
+                            s.@k_in = 0,
+                            s.@best_move = Move(@@min_double, s, to_string(s.id)); // reset best move
+
+                // find the best move
+                Nodes = SELECT s FROM Nodes:s -(_>:E)- Community:t
+                        WHERE s.@community_id != t.@community_id
+                          AND s.@is_current_iter AND t.@is_current_iter // only use Communities in the current iteration
+                        ACCUM 
+                            DOUBLE dq = 2 * s.@community_k_in_map.get(t.@community_id) - s.@k * t.@community_sum_total / @@m,
+                            s.@best_move += Move(dq, t.@community_id, t.@community_vid) // find the best move
+                        POST-ACCUM
+                            IF s.@delta_Q_remove + s.@best_move.delta_q > 0 THEN // if the move increases dq
+                                s.@to_change_community = TRUE// s should move
+                            END,
+                            s.@community_k_in_map.clear()
+                            HAVING s.@to_change_community == TRUE; // only select nodes that will move
+
+                // Add nodes that will move to ChangedNodes
+                ChangedNodes = ChangedNodes UNION Nodes;
+            END;
+        END;
+        // If two nodes swap, only change the community of one of them
+        SwapNodes = SELECT s FROM ChangedNodes:s -(_>:e)- Community:t
+                    WHERE s.@best_move.community == t.@community_id
+                      AND s.@is_current_iter AND t.@is_current_iter // only use Communities in the current iteration
+                      AND t.@to_change_community
+                      AND t.@best_move.community == s.@community_id
+                      // if delta Q are the same, only change the one with larger delta Q or the one with smaller @vid 
+                      AND (
+                        s.@delta_Q_remove + s.@best_move.delta_q < t.@delta_Q_remove + t.@best_move.delta_q
+                        OR (
+                          abs(
+                            (s.@delta_Q_remove + s.@best_move.delta_q) 
+                            - (t.@delta_Q_remove + t.@best_move.delta_q)
+                          ) < 0.00000000001
+                          AND s.@vid > t.@vid
+                        )
+                      )
+                    POST-ACCUM
+                        s.@to_change_community = FALSE;
+            
+        // remove SwapNodes (don't need to be changed)
+        ChangedNodes = ChangedNodes MINUS SwapNodes;
+
+        // Update node communities (based on max ∆Q)
+        SwapNodes = SELECT s FROM ChangedNodes:s
+                    POST-ACCUM
+                        s.@community_id = s.@best_move.community, // move the node
+                        s.@community_vid = s.@best_move.ext_vid,  // move the node (external v_id update)
+                        s.@to_change_community = FALSE;
+        @@move_cnt += ChangedNodes.size();
+
+        // Get all neighbours of the changed node that do not belong to the node’s new community
+        Candidates = SELECT t FROM ChangedNodes:s -(_>:e)- Community:t
+                     WHERE t.@community_id != s.@community_id
+                       AND s.@is_current_iter AND t.@is_current_iter; // only use Communities in the current iteration
+    END;
+
+    // Coarsening
+    @@community_sum_total_map.clear();
+    Tmp = SELECT s FROM AllNodes:s -(_>:e)- Community:t
+          WHERE s.@is_current_iter AND t.@is_current_iter // only use Communities in the current iteration
+          ACCUM
+              IF s.@community_id == t.@community_id THEN
+                  // keep track of how many edges are within the community
+                  @@community_sum_in_map += (s.@community_id -> e.weight) 
+              ELSE
+                  // get LINKS_TO edge weights (how many edges are between communities)
+                  // s.@community_k_in_map += (t.@community_id -> 1)
+                  @@source_target_k_in_map += (s.@community_vid -> (t.@community_vid -> e.weight))
+              END,
+              t.@has_parent += TRUE // Used to help find unattached partitions
+          POST-ACCUM
+              // Write the results to a new community vertex (iteration + 1)
+              //                              ID                      , iter, edges within the community
+              INSERT INTO Community VALUES (s.id+"_"+to_string(iteration+1), iteration+1, s.k_in + @@community_sum_in_map.get(s.@community_id), ""),
+              INSERT INTO HAS_PARENT VALUES (s, s.@community_vid+"_"+to_string(iteration+1)) // link Community's child/parent community
+          ;  
+
+    // Continue community hierarchy for unattached partitions
+    Tmp = SELECT s FROM AllNodes:s
+          WHERE s.@is_current_iter
+            AND NOT s.@has_parent
+          POST-ACCUM
+              // if s is a part of an unattached partition, add to its community hierarchy to maintain parity with rest of graph
+              INSERT INTO Community VALUES (s.id+"_"+to_string(iteration+1), iteration+1, s.k_in + @@community_sum_in_map.get(s.@community_id), ""),
+              INSERT INTO HAS_PARENT VALUES (s, s.id+"_"+to_string(iteration+1)) // link Community's child/parent community
+          ; 
+
+    // link communities
+    // "If two communities have an edge between them, their parents should also have an edge bewtween them"
+    Tmp = SELECT s FROM AllNodes:s -(_>:e)- Community:t
+          WHERE s.@community_vid != t.@community_vid
+            AND s.@is_current_iter AND t.@is_current_iter // only use Communities in the current iteration
+          ACCUM
+              DOUBLE w = @@source_target_k_in_map.get(s.@community_vid).get(t.@community_vid)/2,
+              INSERT INTO LINKS_TO VALUES (s.@community_vid+"_"+to_string(iteration+1), t.@community_vid+"_"+to_string(iteration+1), w)
+          ;
+}
diff --git a/common/gsql/graphRAG/louvain/graphrag_louvain_init.gsql b/common/gsql/graphRAG/louvain/graphrag_louvain_init.gsql
new file mode 100644
index 00000000..2ccbaf2c
--- /dev/null
+++ b/common/gsql/graphRAG/louvain/graphrag_louvain_init.gsql
@@ -0,0 +1,185 @@
+CREATE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UINT n_batches = 1) {
+    /*
+     * Initialize GraphRAG's hierarchical communities.
+     */
+    TYPEDEF TUPLE <DOUBLE delta_q, VERTEX<ResolvedEntity> community, STRING ext_vid> Move;
+    SumAccum<DOUBLE> @@m; // the sum of the weights of all the links in the network
+    MinAccum<VERTEX<ResolvedEntity>> @community_id; // the community ID of the node
+    MinAccum<STRING> @community_vid; // the community ID of the node
+    SumAccum<DOUBLE> @k; // the sum of the weights of the links incident to the node
+    SumAccum<DOUBLE> @k_in; // the sum of the weights of the links inside the previous community of the node
+    SumAccum<DOUBLE> @k_self_loop; // the weight of the self-loop link
+    MapAccum<VERTEX<ResolvedEntity>, SumAccum<DOUBLE>> @community_k_in_map; // the community of the neighbors of the nodes -> the sum of the weights of the links inside the community
+    MapAccum<VERTEX<ResolvedEntity>, SumAccum<DOUBLE>> @@community_sum_total_map; // community ID C -> the sum of the weights of the links incident to nodes in C
+    SumAccum<DOUBLE> @community_sum_total; // the sum of the weights of the links incident to nodes in the community of the node
+    MapAccum<VERTEX<ResolvedEntity>, SumAccum<DOUBLE>> @@community_sum_in_map; // community ID -> the sum of the weights of the links inside the community
+    MapAccum<STRING, MapAccum<STRING, SumAccum<DOUBLE>>> @@source_target_k_in_map; // source community ID -> (target community ID -> the sum of the weights of the links from the source community to the target community)
+    SumAccum<DOUBLE> @delta_Q_remove; // delta Q to remove the node from the previous community
+    MaxAccum<Move> @best_move; // best move of the node with the highest delta Q to move the isolated node into the new community
+    MaxAccum<DOUBLE> @@min_double; // used to reset the @best_move
+    SumAccum<INT> @@move_cnt;
+    OrAccum @to_change_community;
+    SumAccum<INT> @batch_id;
+    MinAccum<INT> @vid;
+
+    AllNodes = {ResolvedEntity.*};
+    DOUBLE wt = 1.0;
+
+    // prevent multiple init runs
+    // z = SELECT s FROM AllNodes:s -(_)-> Community:t;
+    // IF z.size() > 0 THEN
+    //     EXCEPTION reinit(400001);
+    //     RAISE reinit("ERROR: the hierarchical communities have already been initialized");
+    // END;
+
+    // init
+    z = SELECT s FROM AllNodes:s 
+        ACCUM 
+            s.@community_id = s, // assign node to its own community
+            s.@community_vid = s.id, // external id
+            s.@vid = getvid(s), // internal id (used in batching)
+            s.@batch_id = s.@vid % n_batches; // get batch number
+    z = SELECT s FROM AllNodes:s -(_)-> ResolvedEntity:t
+        ACCUM s.@k += wt,
+              @@m += 1;
+        // POST-ACCUM 
+        //     s.@community_id = s, // assign node to its own community
+        //     s.@community_vid = s.id, // external id
+        //     s.@vid = getvid(s), // internal id (used in batching)
+            // s.@batch_id = s.@vid % n_batches; // get batch number
+
+    PRINT z.size();
+    PRINT z;
+    
+    // Local moving
+    INT hop = 0;
+    Candidates = AllNodes;
+    WHILE Candidates.size() > 0 AND hop < max_hop DO
+        hop += 1; 
+        IF hop == 1 THEN // first iteration
+            ChangedNodes = SELECT s FROM Candidates:s -(_:e)-> ResolvedEntity:t
+                           WHERE s.@community_id != t.@community_id // can't move within the same community
+                           ACCUM 
+                               DOUBLE dq = 1 - s.@k * t.@k / (2 * @@m),
+                               s.@best_move += Move(dq, t.@community_id, t.@community_vid) // find the best move
+                           POST-ACCUM
+                               IF s.@best_move.delta_q > 0 THEN // if the move increases dq
+                                  s.@to_change_community += TRUE
+                               END
+                           HAVING s.@to_change_community == TRUE; // only select nodes that will move
+            PRINT ChangedNodes.size();
+        ELSE // other iterations
+            // Calculate sum_total of links in each community
+            Tmp = SELECT s FROM AllNodes:s
+                  POST-ACCUM
+                      @@community_sum_total_map += (s.@community_id -> s.@k);
+            // store community's total edges in each vert (easier access)
+            Tmp = SELECT s FROM AllNodes:s
+                  POST-ACCUM
+                      s.@community_sum_total = @@community_sum_total_map.get(s.@community_id);
+            @@community_sum_total_map.clear();
+
+            // find the best move
+            ChangedNodes = {};
+
+            // process nodes in batch
+            FOREACH batch_id IN RANGE[0, n_batches-1] DO
+                Nodes = SELECT s FROM Candidates:s -(_:e)-> ResolvedEntity:t
+                        WHERE s.@batch_id == batch_id
+                        ACCUM
+                            IF s.@community_id == t.@community_id THEN 
+                                // add edge weights connected to s
+                                s.@k_in += wt
+                            ELSE
+                                // add edge weights connecetd to t
+                                s.@community_k_in_map += (t.@community_id -> wt)
+                            END
+                        POST-ACCUM
+                            // ∆Q if s is moved out of its current community
+                            s.@delta_Q_remove = 2 * s.@k_self_loop - 2 * s.@k_in + s.@k * (s.@community_sum_total - s.@k) / @@m,
+                            s.@k_in = 0,
+                            s.@best_move = Move(@@min_double, s, to_string(s.id)); // reset best move
+
+                // find the best move
+                Nodes = SELECT s FROM Nodes:s -(_:e)-> ResolvedEntity:t
+                        WHERE s.@community_id != t.@community_id
+                        ACCUM 
+                            DOUBLE dq = 2 * s.@community_k_in_map.get(t.@community_id) - s.@k * t.@community_sum_total / @@m,
+                            s.@best_move += Move(dq, t.@community_id, t.@community_vid) // find the best move
+                        POST-ACCUM
+                            IF s.@delta_Q_remove + s.@best_move.delta_q > 0 THEN // if the move increases dq
+                                s.@to_change_community = TRUE// s should move
+                            END,
+                            s.@community_k_in_map.clear()
+                            HAVING s.@to_change_community == TRUE; // only select nodes that will move
+
+                // Add nodes that will move to ChangedNodes
+                ChangedNodes = ChangedNodes UNION Nodes;
+            END;
+        END;
+        // If two nodes swap, only change the community of one of them
+        SwapNodes = SELECT s FROM ChangedNodes:s -(_:e)-> ResolvedEntity:t
+                    WHERE s.@best_move.community == t.@community_id
+                      AND t.@to_change_community
+                      AND t.@best_move.community == s.@community_id
+                      // if delta Q are the same, only change the one with larger delta Q or the one with smaller @vid 
+                      AND (
+                        s.@delta_Q_remove + s.@best_move.delta_q < t.@delta_Q_remove + t.@best_move.delta_q
+                        OR (
+                          abs(
+                            (s.@delta_Q_remove + s.@best_move.delta_q) 
+                            - (t.@delta_Q_remove + t.@best_move.delta_q)
+                          ) < 0.00000000001
+                          AND s.@vid > t.@vid
+                        )
+                      )
+                    POST-ACCUM
+                        s.@to_change_community = FALSE;
+            
+        // remove SwapNodes (don't need to be changed)
+        ChangedNodes = ChangedNodes MINUS SwapNodes;
+
+        // Update node communities (based on max ∆Q)
+        SwapNodes = SELECT s FROM ChangedNodes:s
+                    POST-ACCUM
+                        s.@community_id = s.@best_move.community, // move the node
+                        s.@community_vid = s.@best_move.ext_vid,  // move the node (external v_id update)
+                        s.@to_change_community = FALSE;
+        @@move_cnt += ChangedNodes.size();
+
+        // Get all neighbours of the changed node that do not belong to the node’s new community
+        Candidates = SELECT t FROM ChangedNodes:s -(_:e)-> ResolvedEntity:t
+                     WHERE t.@community_id != s.@community_id;
+    END;
+
+    // Coarsening
+    UINT new_layer = 0;
+    @@community_sum_total_map.clear();
+    Tmp = SELECT s FROM AllNodes:s -(_:e)-> ResolvedEntity:t
+          ACCUM
+              IF s.@community_id == t.@community_id THEN
+                  // keep track of how many edges are within the community
+                  @@community_sum_in_map += (s.@community_id -> wt) 
+              ELSE
+                  // get LINKS_TO edge weights (how many edges are between communities)
+                  @@source_target_k_in_map += (s.@community_vid -> (t.@community_vid -> 1))
+              END
+          POST-ACCUM
+              //                              ID                      , iter, edges within the community
+              INSERT INTO Community VALUES (s.@community_vid+"_1", 1, @@community_sum_in_map.get(s.@community_id), ""),
+              INSERT INTO IN_COMMUNITY VALUES (s, s.@community_vid+"_1") // link entity to it's first community
+          ;  
+
+    PRINT @@source_target_k_in_map;
+
+    @@community_sum_total_map.clear();
+    // link communities
+    Tmp = SELECT s FROM AllNodes:s -(_:e)-> ResolvedEntity:t
+          WHERE s.@community_vid != t.@community_vid
+          ACCUM
+              DOUBLE w = @@source_target_k_in_map.get(s.@community_vid).get(t.@community_vid),
+              INSERT INTO LINKS_TO VALUES (s.@community_vid+"_1", t.@community_vid+"_1", w);
+
+
+    PRINT @@source_target_k_in_map;
+}
diff --git a/common/gsql/graphRAG/louvain/louvain1.gsql b/common/gsql/graphRAG/louvain/louvain1.gsql
deleted file mode 100644
index 494a3625..00000000
--- a/common/gsql/graphRAG/louvain/louvain1.gsql
+++ /dev/null
@@ -1,17 +0,0 @@
-CREATE DISTRIBUTED QUERY graphRAG_louvain_1() {
-    
-    Ents = {ResolvedEntity.*};
-
-    // Put each node into a distinct community
-    // Assume each Entity starts in its own community
-    
-    // For each node i
-        // Compute ∆Q (modularity) when putting node i into the community of some neighbor j
-        // move i to community that yields the largest gain in ∆Q
-
-    Z = SELECT v FROM Ents:v -(_:e)-> ResolvedEntity:r
-        
-
-    ;
-}
-
diff --git a/common/gsql/graphRAG/louvain/modularity.gsql b/common/gsql/graphRAG/louvain/modularity.gsql
new file mode 100644
index 00000000..3aaad826
--- /dev/null
+++ b/common/gsql/graphRAG/louvain/modularity.gsql
@@ -0,0 +1,49 @@
+CREATE DISTRIBUTED QUERY modularity(UINT iteration=1) SYNTAX V2 {
+    SumAccum<DOUBLE> @@sum_weight; // the sum of the weights of all the links in the network
+    MinAccum<STRING> @community_id; // the community ID of the node
+    MapAccum<STRING, SumAccum<DOUBLE>> @@community_total_weight_map; // community ID C -> the sum of the weights of the links incident to nodes in C
+    MapAccum<STRING, SumAccum<DOUBLE>> @@community_in_weight_map; // community ID -> the sum of the weights of the links inside the community
+    SumAccum<DOUBLE> @@modularity;
+    MinAccum<STRING> @parent;
+    DOUBLE wt = 1.0;
+    Comms = {Community.*};
+
+    // Assign Entities to their correct community (given the specified iteration level)
+    IF iteration > 1 THEN
+        Comms = SELECT t FROM Comms:c -(<HAS_PARENT)- Community:t
+                WHERE c.iteration == iteration
+                ACCUM t.@parent = c.id;
+        
+        FOREACH i IN RANGE[iteration-1, 2].step(-1) DO
+             Comms = SELECT t FROM Comms:c -(<HAS_PARENT)- Community:t
+                     WHERE c.iteration == i
+                     ACCUM t.@parent = c.@parent;
+        END;
+        Entities = SELECT t FROM Comms:c -(_>)- ResolvedEntity:t
+                   ACCUM t.@community_id = c.@parent;
+
+    ELSE
+        Entities = SELECT t FROM Comms:c -(_>)- ResolvedEntity:t
+                   WHERE c.iteration == iteration
+                   ACCUM t.@community_id = c.id;
+    END;
+  
+    Nodes = SELECT s FROM Entities:s -(_>:e)- ResolvedEntity:t
+            ACCUM  
+                IF s.@community_id == t.@community_id THEN
+                    @@community_in_weight_map += (s.@community_id -> wt)
+                END,
+                @@community_total_weight_map += (s.@community_id -> wt),
+                @@sum_weight += wt;
+
+    @@modularity = 0;
+    FOREACH (community, total_weight) IN @@community_total_weight_map DO
+        DOUBLE in_weight = 0;
+        IF @@community_in_weight_map.containsKey(community) THEN
+            in_weight = @@community_in_weight_map.get(community);
+        END;
+        @@modularity += in_weight / @@sum_weight - pow(total_weight / @@sum_weight, 2);
+    END;
+    
+    PRINT @@modularity as mod;
+}
diff --git a/common/gsql/graphRAG/louvain/stream_community.gsql b/common/gsql/graphRAG/louvain/stream_community.gsql
new file mode 100644
index 00000000..d01959d2
--- /dev/null
+++ b/common/gsql/graphRAG/louvain/stream_community.gsql
@@ -0,0 +1,9 @@
+CREATE DISTRIBUTED QUERY stream_community(UINT iter) {
+    Comms = {Community.*};
+
+    // Get communities of the current iteration
+    Comms = SELECT s FROM Comms:s
+               WHERE s.iteration == iter;
+
+    PRINT Comms;
+}
diff --git a/common/gsql/supportai/SupportAI_Schema.gsql b/common/gsql/supportai/SupportAI_Schema.gsql
index 1a705eaf..3e127d82 100644
--- a/common/gsql/supportai/SupportAI_Schema.gsql
+++ b/common/gsql/supportai/SupportAI_Schema.gsql
@@ -20,16 +20,14 @@ CREATE SCHEMA_CHANGE JOB add_supportai_schema {
     ADD DIRECTED EDGE CONTAINS_DOCUMENT(FROM DocumentCollection, TO Document) WITH REVERSE_EDGE="reverse_CONTAINS_DOCUMENT";
 
     // GraphRAG
-    ADD VERTEX Community(PRIMARY_ID id STRING, description INT) WITH PRIMARY_ID_AS_ATTRIBUTE="true";
+    ADD VERTEX Community (PRIMARY_ID id STRING, iteration UINT, k_in UINT, description STRING) WITH PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX ResolvedEntity(PRIMARY_ID id STRING, entity_type STRING) WITH PRIMARY_ID_AS_ATTRIBUTE="true";
 
-    ADD DIRECTED EDGE RELATIONSHIP(FROM Entity, TO Entity, relation_type STRING) WITH REVERSE_EDGE="reverse_RELATIONSHIP"; // TODO: check where knn algo writes results
+    ADD DIRECTED EDGE RELATIONSHIP(FROM Entity, TO Entity, relation_type STRING) WITH REVERSE_EDGE="reverse_RELATIONSHIP"; 
     ADD DIRECTED EDGE RESOLVES_TO(FROM Entity, TO ResolvedEntity, relation_type STRING) WITH REVERSE_EDGE="reverse_RESOLVES_TO"; // Connect ResolvedEntities with their children entities
-    ADD DIRECTED EDGE RESOLVED_RELATIONSHIP(FROM ResolvedEntity, TO ResolvedEntity) WITH REVERSE_EDGE="reverse_RESOLVED_RELATIONSHIP"; // store edges between entities after they're resolved
-    ADD DIRECTED EDGE IN_COMMUNITY(FROM ResolvedEntity, TO Community) WITH REVERSE_EDGE="reverse_IN_COMMUNITY";
+    ADD DIRECTED EDGE RESOLVED_RELATIONSHIP(FROM ResolvedEntity, TO ResolvedEntity, relation_type STRING) WITH REVERSE_EDGE="reverse_RESOLVED_RELATIONSHIP"; // store edges between entities after they're resolved
 
-    // TODO: louvain will be run on resolved entities, but stored in community then on communities until louvain runs out
-    // Hierarchical communities (Louvain/Leiden)
-    // ADD UNDIRECTED EDGE LINKS_TO(FROM Community, TO Community);
-    // ADD DIRECTED EDGE BELONGS_TO(FROM Community, TO Community);
+    ADD DIRECTED EDGE IN_COMMUNITY(FROM ResolvedEntity, TO Community) WITH REVERSE_EDGE="reverse_IN_COMMUNITY";
+    ADD DIRECTED EDGE LINKS_TO (from Community, to Community, weight DOUBLE) WITH REVERSE_EDGE="reverse_LINKS_TO";
+    ADD DIRECTED EDGE HAS_PARENT (from Community, to Community) WITH REVERSE_EDGE="reverse_HAS_PARENT";
 }
diff --git a/common/py_schemas/tool_io_schemas.py b/common/py_schemas/tool_io_schemas.py
index 1ea6ed3e..4ca91b3d 100644
--- a/common/py_schemas/tool_io_schemas.py
+++ b/common/py_schemas/tool_io_schemas.py
@@ -1,10 +1,8 @@
+from typing import Dict, List, Optional
+
 from langchain.pydantic_v1 import BaseModel, Field
-from typing import Optional
-from langchain_community.graphs.graph_document import (
-    Node as BaseNode,
-    Relationship as BaseRelationship,
-)
-from typing import List, Dict, Type
+from langchain_community.graphs.graph_document import Node as BaseNode
+from langchain_community.graphs.graph_document import Relationship as BaseRelationship
 
 
 class MapQuestionToSchemaResponse(BaseModel):
@@ -81,14 +79,27 @@ class KnowledgeGraph(BaseModel):
         ..., description="List of relationships in the knowledge graph"
     )
 
+
 class ReportQuestion(BaseModel):
     question: str = Field("The question to be asked")
     reasoning: str = Field("The reasoning behind the question")
 
+
 class ReportSection(BaseModel):
     section: str = Field("Name of the section")
     description: str = Field("Description of the section")
-    questions: List[ReportQuestion] = Field("List of questions and reasoning for the section")
+    questions: List[ReportQuestion] = Field(
+        "List of questions and reasoning for the section"
+    )
+
 
 class ReportSections(BaseModel):
     sections: List[ReportSection] = Field("List of sections for the report")
+
+
+class CommunitySummary(BaseModel):
+    """Generate a summary of the documents that are within this community."""
+
+    summary: str = Field(
+        ..., description="The community summary derived from the input documents"
+    )
diff --git a/copilot/docs/notebooks/graphrag.ipynb b/copilot/docs/notebooks/graphrag.ipynb
index bde1b78f..e915f392 100644
--- a/copilot/docs/notebooks/graphrag.ipynb
+++ b/copilot/docs/notebooks/graphrag.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -24,7 +24,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -33,7 +33,7 @@
        "'The graph GraphRAG_pytgdocs is created.'"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -45,7 +45,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -54,32 +54,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "{'host_name': 'https://algotesting.i.tgcloud.io',\n",
-       " 'schema_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_schema].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_schema\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 0)\\\\nTrying to add local vertex \\'DocumentChunk\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Document\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Concept\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Entity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Relationship\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'DocumentCollection\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Content\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Community\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'ResolvedEntity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CONTENT\\' and its reverse edge \\'reverse_HAS_CONTENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_CHILD_OF\\' and its reverse edge \\'reverse_IS_CHILD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_HEAD_OF\\' and its reverse edge \\'reverse_IS_HEAD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_TAIL\\' and its reverse edge \\'reverse_HAS_TAIL\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_RELATIONSHIP\\' and its reverse edge \\'reverse_DESCRIBES_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_ENTITY\\' and its reverse edge \\'reverse_DESCRIBES_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_ENTITY\\' and its reverse edge \\'reverse_CONTAINS_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'MENTIONS_RELATIONSHIP\\' and its reverse edge \\'reverse_MENTIONS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_AFTER\\' and its reverse edge \\'reverse_IS_AFTER\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CHILD\\' and its reverse edge \\'reverse_HAS_CHILD\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_RELATIONSHIP\\' and its reverse edge \\'reverse_HAS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_DOCUMENT\\' and its reverse edge \\'reverse_CONTAINS_DOCUMENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RELATIONSHIP\\' and its reverse edge \\'reverse_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVES_TO\\' and its reverse edge \\'reverse_RESOLVES_TO\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVED_RELATIONSHIP\\' and its reverse edge \\'reverse_RESOLVED_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IN_COMMUNITY\\' and its reverse edge \\'reverse_IN_COMMUNITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 1\\\\nThe job add_supportai_schema completes in 2.208 seconds!\\\\nLocal schema change succeeded.\"',\n",
-       " 'index_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_indexes].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_indexes\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 1)\\\\nTrying to add index \\'doc_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_indexepoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 2\\\\nThe job add_supportai_indexes completes in 3.025 seconds!\\\\nLocal schema change succeeded.\"'}"
+       " 'schema_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_schema].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_schema\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 0)\\\\nTrying to add local vertex \\'DocumentChunk\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Document\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Concept\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Entity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Relationship\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'DocumentCollection\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Content\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Community\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'ResolvedEntity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CONTENT\\' and its reverse edge \\'reverse_HAS_CONTENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_CHILD_OF\\' and its reverse edge \\'reverse_IS_CHILD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_HEAD_OF\\' and its reverse edge \\'reverse_IS_HEAD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_TAIL\\' and its reverse edge \\'reverse_HAS_TAIL\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_RELATIONSHIP\\' and its reverse edge \\'reverse_DESCRIBES_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_ENTITY\\' and its reverse edge \\'reverse_DESCRIBES_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_ENTITY\\' and its reverse edge \\'reverse_CONTAINS_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'MENTIONS_RELATIONSHIP\\' and its reverse edge \\'reverse_MENTIONS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_AFTER\\' and its reverse edge \\'reverse_IS_AFTER\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CHILD\\' and its reverse edge \\'reverse_HAS_CHILD\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_RELATIONSHIP\\' and its reverse edge \\'reverse_HAS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_DOCUMENT\\' and its reverse edge \\'reverse_CONTAINS_DOCUMENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RELATIONSHIP\\' and its reverse edge \\'reverse_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVES_TO\\' and its reverse edge \\'reverse_RESOLVES_TO\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVED_RELATIONSHIP\\' and its reverse edge \\'reverse_RESOLVED_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IN_COMMUNITY\\' and its reverse edge \\'reverse_IN_COMMUNITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'LINKS_TO\\' and its reverse edge \\'reverse_LINKS_TO\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_PARENT\\' and its reverse edge \\'reverse_HAS_PARENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 1\\\\nThe job add_supportai_schema completes in 1.043 seconds!\\\\nLocal schema change succeeded.\"',\n",
+       " 'index_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_indexes].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_indexes\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 1)\\\\nTrying to add index \\'doc_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_indexepoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 2\\\\nThe job add_supportai_indexes completes in 1.066 seconds!\\\\nLocal schema change succeeded.\"'}"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "# And then add CoPilot's address to the connection. This address\n",
-    "# is the host's address where the CoPilot container is running.\n",
+    "# # And then add CoPilot's address to the connection. This address\n",
+    "# # is the host's address where the CoPilot container is running.\n",
     "conn.ai.configureCoPilotHost(\"http://localhost:8000\")\n",
     "conn.ai.initializeSupportAI()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -95,18 +95,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'job_name': 'load_documents_content_json_b89acfebac9e4fb98efd20a49659808e',\n",
-       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_b89acfebac9e4fb98efd20a49659808e.stream.SupportAI_GraphRAG_pytgdocs_5698bff74d844534901cba9e1b3d55bf.1722466964295',\n",
-       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_b89acfebac9e4fb98efd20a49659808e.stream.SupportAI_GraphRAG_pytgdocs_5698bff74d844534901cba9e1b3d55bf.1722466964295'}"
+       "{'job_name': 'load_documents_content_json_852f54bfd00a475fa4efc3ba9319f0ac',\n",
+       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_852f54bfd00a475fa4efc3ba9319f0ac.stream.SupportAI_GraphRAG_pytgdocs_6a6331e3e5e248eaae389788c9bab325.1723217024268',\n",
+       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_852f54bfd00a475fa4efc3ba9319f0ac.stream.SupportAI_GraphRAG_pytgdocs_6a6331e3e5e248eaae389788c9bab325.1723217024268'}"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -121,41 +121,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 22,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<Response [200 OK]>"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "import httpx\n",
-    "import base64\n",
+    "# import httpx\n",
+    "# import base64\n",
     "\n",
     "\n",
-    "def make_headers(conn: TigerGraphConnection):\n",
-    "    tkn = base64.b64encode(f\"{conn.username}:{conn.password}\".encode()).decode()\n",
-    "    headers = {\"Authorization\": f\"Basic {tkn}\"}\n",
-    "    return headers\n",
+    "# def make_headers(conn: TigerGraphConnection):\n",
+    "#     tkn = base64.b64encode(f\"{conn.username}:{conn.password}\".encode()).decode()\n",
+    "#     headers = {\"Authorization\": f\"Basic {tkn}\"}\n",
+    "#     return headers\n",
     "\n",
     "\n",
-    "httpx.get(\n",
-    "    \"http://localhost:8001/GraphRAG_pytgdocs/consistency_status/graphrag\",\n",
-    "    headers=make_headers(conn),\n",
-    ")\n",
-    "# conn.ai.forceConsistencyUpdate()"
+    "# httpx.get(\n",
+    "#     \"http://localhost:8001/GraphRAG_pytgdocs/consistency_status/graphrag\",\n",
+    "#     headers=make_headers(conn),\n",
+    "#     timeout=None,\n",
+    "# )\n",
+    "# # conn.ai.forceConsistencyUpdate()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -165,7 +155,7 @@
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43masdf\u001b[49m\n",
+      "Cell \u001b[0;32mIn[23], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43masdf\u001b[49m\n",
       "\u001b[0;31mNameError\u001b[0m: name 'asdf' is not defined"
      ]
     }
@@ -176,24 +166,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for v in [\"Community\"]:\n",
+    "    try:\n",
+    "        conn.delVertices(v)\n",
+    "    except:\n",
+    "        pass\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'job_name': 'load_documents_content_json_b89acfebac9e4fb98efd20a49659808e',\n",
-       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_b89acfebac9e4fb98efd20a49659808e.stream.SupportAI_GraphRAG_pytgdocs_5698bff74d844534901cba9e1b3d55bf.1722531204658',\n",
-       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_b89acfebac9e4fb98efd20a49659808e.stream.SupportAI_GraphRAG_pytgdocs_5698bff74d844534901cba9e1b3d55bf.1722531204658'}"
+       "{'job_name': 'load_documents_content_json_3e62fb87723945ea9a0380956694b7ec',\n",
+       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_3e62fb87723945ea9a0380956694b7ec.stream.SupportAI_GraphRAG_pytgdocs_cc751adab29643b28af1b7bf13b6515b.1723213722186',\n",
+       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_3e62fb87723945ea9a0380956694b7ec.stream.SupportAI_GraphRAG_pytgdocs_cc751adab29643b28af1b7bf13b6515b.1723213722186'}"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "for v in [\"Document\", \"Content\", \"DocumentChunk\", \"Entity\",\"ResolvedEntity\"]:\n",
+    "# for v in [\"Document\", \"Content\", \"DocumentChunk\", \"Entity\",\"ResolvedEntity\"]:\n",
+    "# for v in [\"ResolvedEntity\"]:\n",
     "# for v in [\"ResolvedEntity\"]:\n",
     "    try:\n",
     "        conn.delVertices(v)\n",
@@ -222,32 +227,6 @@
     "\"\"\")"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "KeyError",
-     "evalue": "'deleted_vertices'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[33], line 5\u001b[0m\n\u001b[1;32m      2\u001b[0m conn\u001b[38;5;241m.\u001b[39mgetToken()\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCommunity\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[1;32m      4\u001b[0m     \u001b[38;5;66;03m# for v in [\"ResolvedEntity\"]:\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m     \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdelVertices\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/.venv/ml/lib/python3.11/site-packages/pyTigerGraph/pyTigerGraphVertex.py:688\u001b[0m, in \u001b[0;36mpyTigerGraphVertex.delVertices\u001b[0;34m(self, vertexType, where, limit, sort, permanent, timeout)\u001b[0m\n\u001b[1;32m    685\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mand\u001b[39;00m timeout \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m    686\u001b[0m     url \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m?\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m isFirst \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m&\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout=\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(timeout)\n\u001b[0;32m--> 688\u001b[0m ret \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_delete\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdeleted_vertices\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[1;32m    690\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m logger\u001b[38;5;241m.\u001b[39mlevel \u001b[38;5;241m==\u001b[39m logging\u001b[38;5;241m.\u001b[39mDEBUG:\n\u001b[1;32m    691\u001b[0m     logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreturn: \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mstr\u001b[39m(ret))\n",
-      "\u001b[0;31mKeyError\u001b[0m: 'deleted_vertices'"
-     ]
-    }
-   ],
-   "source": [
-    "conn.graphname = \"Cora\"\n",
-    "conn.getToken()\n",
-    "for v in [\"Community\"]:\n",
-    "    # for v in [\"ResolvedEntity\"]:\n",
-    "    conn.delVertices(v)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/eventual-consistency-service/app/ecc_util.py b/eventual-consistency-service/app/ecc_util.py
index 5656e219..bccadd77 100644
--- a/eventual-consistency-service/app/ecc_util.py
+++ b/eventual-consistency-service/app/ecc_util.py
@@ -1,5 +1,15 @@
 from common.chunkers import character_chunker, regex_chunker, semantic_chunker
-from common.config import doc_processing_config, embedding_service
+from common.config import doc_processing_config, embedding_service, llm_config
+from common.llm_services import (
+    AWS_SageMaker_Endpoint,
+    AWSBedrock,
+    AzureOpenAI,
+    GoogleVertexAI,
+    Groq,
+    HuggingFaceEndpoint,
+    Ollama,
+    OpenAI,
+)
 
 
 def get_chunker():
@@ -22,3 +32,24 @@ def get_chunker():
         raise ValueError("Invalid chunker type")
 
     return chunker
+
+
+def get_llm_service():
+    if llm_config["completion_service"]["llm_service"].lower() == "openai":
+        llm_provider = OpenAI(llm_config["completion_service"])
+    elif llm_config["completion_service"]["llm_service"].lower() == "azure":
+        llm_provider = AzureOpenAI(llm_config["completion_service"])
+    elif llm_config["completion_service"]["llm_service"].lower() == "sagemaker":
+        llm_provider = AWS_SageMaker_Endpoint(llm_config["completion_service"])
+    elif llm_config["completion_service"]["llm_service"].lower() == "vertexai":
+        llm_provider = GoogleVertexAI(llm_config["completion_service"])
+    elif llm_config["completion_service"]["llm_service"].lower() == "bedrock":
+        llm_provider = AWSBedrock(llm_config["completion_service"])
+    elif llm_config["completion_service"]["llm_service"].lower() == "groq":
+        llm_provider = Groq(llm_config["completion_service"])
+    elif llm_config["completion_service"]["llm_service"].lower() == "ollama":
+        llm_provider = Ollama(llm_config["completion_service"])
+    elif llm_config["completion_service"]["llm_service"].lower() == "huggingface":
+        llm_provider = HuggingFaceEndpoint(llm_config["completion_service"])
+
+    return llm_provider
diff --git a/eventual-consistency-service/app/graphrag/community_summarizer.py b/eventual-consistency-service/app/graphrag/community_summarizer.py
new file mode 100644
index 00000000..d250b1f3
--- /dev/null
+++ b/eventual-consistency-service/app/graphrag/community_summarizer.py
@@ -0,0 +1,138 @@
+import json
+
+from langchain.output_parsers import PydanticOutputParser
+from langchain.prompts import ChatPromptTemplate
+from langchain_core.prompts import PromptTemplate
+
+from common.llm_services import LLM_Model
+from common.py_schemas import CommunitySummary
+
+# src: https://github.com/microsoft/graphrag/blob/main/graphrag/index/graph/extractors/summarize/prompts.py
+SUMMARIZE_PROMPT = PromptTemplate.from_template("""
+You are a helpful assistant responsible for generating a comprehensive summary of the data provided below.
+Given one or two entities, and a list of descriptions, all related to the same entity or group of entities.
+Please concatenate all of these into a single, comprehensive description. Make sure to include information collected from all the descriptions.
+If the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary.
+Make sure it is written in third person, and include the entity names so we the have full context.
+
+#######
+-Data-
+Entities: {entity_name}
+Description List: {description_list}
+#######
+Output:
+""")
+
+
+class CommunitySummarizer:
+    def __init__(
+        self,
+        llm_service: LLM_Model,
+    ):
+        self.llm_service = llm_service
+
+    def _extract_kg_from_doc(self, doc, chain, parser):
+        try:
+            out = chain.invoke(
+                {"input": doc, "format_instructions": parser.get_format_instructions()}
+            )
+        except Exception as e:
+            print("Error: ", e)
+            return {"nodes": [], "rels": []}
+        try:
+            if "```json" not in out.content:
+                json_out = json.loads(out.content.strip("content="))
+            else:
+                json_out = json.loads(
+                    out.content.split("```")[1].strip("```").strip("json").strip()
+                )
+
+            formatted_rels = []
+            for rels in json_out["rels"]:
+                if isinstance(rels["source"], str) and isinstance(rels["target"], str):
+                    formatted_rels.append(
+                        {
+                            "source": rels["source"],
+                            "target": rels["target"],
+                            "type": rels["relation_type"].replace(" ", "_").upper(),
+                            "definition": rels["definition"],
+                        }
+                    )
+                elif isinstance(rels["source"], dict) and isinstance(
+                    rels["target"], str
+                ):
+                    formatted_rels.append(
+                        {
+                            "source": rels["source"]["id"],
+                            "target": rels["target"],
+                            "type": rels["relation_type"].replace(" ", "_").upper(),
+                            "definition": rels["definition"],
+                        }
+                    )
+                elif isinstance(rels["source"], str) and isinstance(
+                    rels["target"], dict
+                ):
+                    formatted_rels.append(
+                        {
+                            "source": rels["source"],
+                            "target": rels["target"]["id"],
+                            "type": rels["relation_type"].replace(" ", "_").upper(),
+                            "definition": rels["definition"],
+                        }
+                    )
+                elif isinstance(rels["source"], dict) and isinstance(
+                    rels["target"], dict
+                ):
+                    formatted_rels.append(
+                        {
+                            "source": rels["source"]["id"],
+                            "target": rels["target"]["id"],
+                            "type": rels["relation_type"].replace(" ", "_").upper(),
+                            "definition": rels["definition"],
+                        }
+                    )
+                else:
+                    raise Exception("Relationship parsing error")
+            formatted_nodes = []
+            for node in json_out["nodes"]:
+                formatted_nodes.append(
+                    {
+                        "id": node["id"],
+                        "type": node["node_type"].replace(" ", "_").capitalize(),
+                        "definition": node["definition"],
+                    }
+                )
+
+            # filter relationships and nodes based on allowed types
+            if self.strict_mode:
+                if self.allowed_vertex_types:
+                    formatted_nodes = [
+                        node
+                        for node in formatted_nodes
+                        if node["type"] in self.allowed_vertex_types
+                    ]
+                if self.allowed_edge_types:
+                    formatted_rels = [
+                        rel
+                        for rel in formatted_rels
+                        if rel["type"] in self.allowed_edge_types
+                    ]
+            return {"nodes": formatted_nodes, "rels": formatted_rels}
+        except:
+            print("Error Processing: ", out)
+        return {"nodes": [], "rels": []}
+
+    async def summarize(self, name: str, text: list[str]) -> CommunitySummary:
+        # parser = PydanticOutputParser(pydantic_object=CommunitySummary)
+        structured_llm = self.llm_service.model.with_structured_output(CommunitySummary)
+        chain = SUMMARIZE_PROMPT | structured_llm
+        summary = await chain.ainvoke(
+            {
+                "entity_name": name,
+                "description_list": text,
+                # "format_instructions": parser.get_format_instructions(),
+            }
+        )
+        # summary = self._extract_kg_from_doc(text, chain, parser)
+        # summary = None
+        return summary.summary
diff --git a/eventual-consistency-service/app/graphrag/graph_rag.py b/eventual-consistency-service/app/graphrag/graph_rag.py
index 4403756d..d4e3a7d6 100644
--- a/eventual-consistency-service/app/graphrag/graph_rag.py
+++ b/eventual-consistency-service/app/graphrag/graph_rag.py
@@ -5,15 +5,16 @@
 
 import httpx
 from aiochannel import Channel
-from common.config import embedding_service
-from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore
-from common.extractors.BaseExtractor import BaseExtractor
 from graphrag import workers
 from graphrag.util import http_timeout, init, make_headers, stream_ids
 from pyTigerGraph import TigerGraphConnection
 
-http_logs = logging.getLogger("httpx")
-http_logs.setLevel(logging.WARNING)
+from common.config import embedding_service
+from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore
+from common.extractors.BaseExtractor import BaseExtractor
+
+# http_logs = logging.getLogger("httpx")
+# http_logs.setLevel(logging.WARNING)
 logger = logging.getLogger(__name__)
 
 consistency_checkers = {}
@@ -209,7 +210,7 @@ async def resolve_entities(
     async with asyncio.TaskGroup() as grp:
         # for every entity
         async for entity_id in entity_chan:
-            print(f"***Etity ID from chan {entity_id}")
+            print(f"***Entity ID from chan {entity_id}", flush=True)
             grp.create_task(
                 workers.resolve_entity(conn, upsert_chan, emb_store, entity_id)
             )
@@ -226,9 +227,115 @@ async def resolve_entities(
         res.raise_for_status()
 
 
-async def communities(conn: TigerGraphConnection):
-    pass
-    # Setup
+async def communities(conn: TigerGraphConnection, community_chan: Channel):
+    """
+    Run louvain
+    """
+    # first pass: Group ResolvedEntities into Communities
+    logger.info("Initializing Communities (first louvain pass)")
+    headers = make_headers(conn)
+    async with httpx.AsyncClient(timeout=None) as client:
+        res = await client.get(
+            f"{conn.restppUrl}/query/{conn.graphname}/graphrag_louvain_init",
+            params={"n_batches": 1},
+            headers=headers,
+        )
+        res.raise_for_status()
+    # get the modularity
+    async with httpx.AsyncClient(timeout=None) as client:
+        res = await client.get(
+            f"{conn.restppUrl}/query/{conn.graphname}/modularity",
+            params={"iteration": 1, "batch_num": 1},
+            headers=headers,
+        )
+        res.raise_for_status()
+    mod = res.json()["results"][0]["mod"]
+    print(f"****mod 1: {mod}", flush=True)
+    await community_chan.put(1)
+
+    # nth pass: Iterate on Resolved Entities until modularity stops increasing
+    prev_mod = -10
+    i = 0
+    # for _ in range(1, 5):
+    prev_mod = 0
+    while abs(prev_mod - mod) > 0.0000001 and prev_mod != 0:
+        prev_mod = mod
+        logger.info(f"Running louvain on Communities (iteration: {i})")
+        i += 1
+        # louvain pass
+        async with httpx.AsyncClient(timeout=None) as client:
+            res = await client.get(
+                f"{conn.restppUrl}/query/{conn.graphname}/graphrag_louvain_communities",
+                params={"n_batches": 1},
+                headers=headers,
+            )
+
+        res.raise_for_status()
+
+        # get the modularity
+        async with httpx.AsyncClient(timeout=None) as client:
+            res = await client.get(
+                f"{conn.restppUrl}/query/{conn.graphname}/modularity",
+                params={"iteration": i + 1, "batch_num": 1},
+                headers=headers,
+            )
+        res.raise_for_status()
+        mod = res.json()["results"][0]["mod"]
+        print(f"*** mod {i+1}: {mod}", flush=True)
+        print(f"****** mod diff: {abs(prev_mod - mod)}", flush=True)
+
+        # write iter to chan for layer to be processed
+        await community_chan.put(i + 1)
+
+    # TODO: erase last run since it's ∆q to the run before it will be small
+    logger.info("closing communities chan")
+    community_chan.close()
+
+
+async def stream_communities(
+    conn: TigerGraphConnection,
+    community_chan: Channel,
+    comm_process_chan: Channel,
+):
+    """
+    Streams Community IDs from the grpah for a given iteration (from the channel)
+    """
+    logger.info("streaming communities")
+
+    headers = make_headers(conn)
+    # TODO:
+    # can only do one layer at a time to ensure that every child community has their descriptions
+    async for i in community_chan:
+        # get the community from that layer
+        async with httpx.AsyncClient(timeout=None) as client:
+            resp = await client.get(
+                f"{conn.restppUrl}/query/{conn.graphname}/stream_community",
+                params={"iter": i},
+                headers=headers,
+            )
+        resp.raise_for_status()
+        comms = resp.json()["results"][0]["Comms"]
+
+        for c in comms:
+            await comm_process_chan.put((i, c["v_id"]))
+
+    logger.info("stream_communities done")
+    logger.info("closing comm_process_chan")
+    comm_process_chan.close()
+
+
+async def summarize_communities(
+    conn: TigerGraphConnection,
+    comm_process_chan: Channel,
+    upsert_chan: Channel,
+):
+    async with asyncio.TaskGroup() as tg:
+        async for c in comm_process_chan:
+            tg.create_task(workers.process_community(conn, upsert_chan, *c))
+            break
+
+    logger.info("closing upsert_chan")
+    upsert_chan.close()
 
 
 async def run(graphname: str, conn: TigerGraphConnection):
@@ -245,7 +352,10 @@ async def run(graphname: str, conn: TigerGraphConnection):
     extractor, index_stores = await init(conn)
     init_start = time.perf_counter()
 
-    if False:
+    abc = True
+    abc = False
+    if abc:
+        logger.info("Doc Processing Start")
         docs_chan = Channel(1)
         embed_chan = Channel(100)
         upsert_chan = Channel(100)
@@ -266,11 +376,13 @@ async def run(graphname: str, conn: TigerGraphConnection):
                 extract(extract_chan, upsert_chan, embed_chan, extractor, conn)
             )
     init_end = time.perf_counter()
+    logger.info("Doc Processing End")
 
     # Entity Resolution
     entity_start = time.perf_counter()
 
-    if False:
+    if abc:
+        logger.info("Entity Processing Start")
         entities_chan = Channel(100)
         upsert_chan = Channel(100)
         async with asyncio.TaskGroup() as grp:
@@ -285,13 +397,35 @@ async def run(graphname: str, conn: TigerGraphConnection):
             )
             grp.create_task(upsert(upsert_chan))
     entity_end = time.perf_counter()
+    logger.info("Entity Processing End")
 
     # Community Detection
     community_start = time.perf_counter()
     if True:
-        await communities(conn) 
+        # FIXME: delete community delete
+        for v in ["Community"]:
+            try:
+                conn.delVertices(v)
+            except:
+                pass
+        logger.info("Community Processing Start")
+        communities_chan = Channel(1)
+        upsert_chan = Channel(10)
+        comm_process_chan = Channel(100)
+        upsert_chan = Channel(100)
+        async with asyncio.TaskGroup() as grp:
+            # run louvain
+            grp.create_task(communities(conn, communities_chan))
+            # get the communities
+            grp.create_task(
+                stream_communities(conn, communities_chan, comm_process_chan)
+            )
+            # summarize each community
+            grp.create_task(summarize_communities(conn, comm_process_chan, upsert_chan))
+            grp.create_task(upsert(upsert_chan))
 
     community_end = time.perf_counter()
+    logger.info("Community Processing End")
 
     # Community Summarization
     end = time.perf_counter()
diff --git a/eventual-consistency-service/app/graphrag/util.py b/eventual-consistency-service/app/graphrag/util.py
index 74dbc56d..6876b5de 100644
--- a/eventual-consistency-service/app/graphrag/util.py
+++ b/eventual-consistency-service/app/graphrag/util.py
@@ -6,6 +6,9 @@
 from glob import glob
 
 import httpx
+from graphrag import workers
+from pyTigerGraph import TigerGraphConnection
+
 from common.config import (
     doc_processing_config,
     embedding_service,
@@ -17,36 +20,28 @@
 from common.extractors import GraphExtractor, LLMEntityRelationshipExtractor
 from common.extractors.BaseExtractor import BaseExtractor
 from common.logs.logwriter import LogWriter
-from graphrag import workers
-from pyTigerGraph import TigerGraphConnection
 
 logger = logging.getLogger(__name__)
 http_timeout = httpx.Timeout(15.0)
 
 
 async def install_queries(
-    requried_queries: list[str], conn: TigerGraphConnection, n_workers=8
+    requried_queries: list[str],
+    conn: TigerGraphConnection,
 ):
     # queries that are currently installed
     installed_queries = [q.split("/")[-1] for q in conn.getEndpoints(dynamic=True)]
 
-    tasks = []
-    async with asyncio.TaskGroup() as grp:
-        for q in requried_queries:
-            # only install n queries at a time (n=n_workers)
-            async with asyncio.Semaphore(n_workers):
-                q_name = q.split("/")[-1]
-                # if the query is not installed, install it
-                if q_name not in installed_queries:
-                    task = grp.create_task(workers.install_query(conn, q))
-                    tasks.append(task)
-
-    for t in tasks:
-        res = t.result()
-        print(res)
-        # stop system if a required query doesn't install
-        if res["error"]:
-            raise Exception(res["message"])
+    # doesn't need to be parallel since tg only does it one at a time
+    for q in requried_queries:
+        # only install n queries at a time (n=n_workers)
+        q_name = q.split("/")[-1]
+        # if the query is not installed, install it
+        if q_name not in installed_queries:
+            res = await workers.install_query(conn, q)
+            # stop system if a required query doesn't install
+            if res["error"]:
+                raise Exception(res["message"])
 
 
 async def init_embedding_index(s: MilvusEmbeddingStore, vertex_field: str):
@@ -69,9 +64,14 @@ async def init(
         "common/gsql/graphRAG/StreamDocContent",
         "common/gsql/graphRAG/SetEpochProcessing",
         "common/gsql/graphRAG/ResolveRelationships",
+        "common/gsql/graphRAG/get_community_children",
+        "common/gsql/graphRAG/louvain/graphrag_louvain_init",
+        "common/gsql/graphRAG/louvain/graphrag_louvain_communities",
+        "common/gsql/graphRAG/louvain/modularity",
+        "common/gsql/graphRAG/louvain/stream_community",
     ]
     # add louvain to queries
-    q = [x.split('.gsql')[0] for x in glob("common/gsql/graphRAG/louvain/*")]
+    q = [x.split(".gsql")[0] for x in glob("common/gsql/graphRAG/louvain/*")]
     requried_queries.extend(q)
     await install_queries(requried_queries, conn)
 
@@ -246,3 +246,24 @@ async def upsert_edge(
             f"{conn.restppUrl}/graph/{conn.graphname}", data=data, headers=headers
         )
         res.raise_for_status()
+
+
+async def get_commuinty_children(conn, i: int, c: str):
+    headers = make_headers(conn)
+    async with httpx.AsyncClient(timeout=None) as client:
+        resp = await client.get(
+            f"{conn.restppUrl}/query/{conn.graphname}/get_community_children",
+            params={"comm": c, "iter": i},
+            headers=headers,
+        )
+        resp.raise_for_status()
+    descrs = []
+    for d in resp.json()["results"][0]["children"]:
+        desc = d["attributes"]["description"]
+        if len(desc) == 0:
+            desc = d["v_id"]
+
+        descrs.append(desc)
+
+    print(">>>", descrs, flush=True)
+    return descrs
diff --git a/eventual-consistency-service/app/graphrag/workers.py b/eventual-consistency-service/app/graphrag/workers.py
index 4c1174df..22980d96 100644
--- a/eventual-consistency-service/app/graphrag/workers.py
+++ b/eventual-consistency-service/app/graphrag/workers.py
@@ -6,14 +6,15 @@
 import ecc_util
 import httpx
 from aiochannel import Channel
+from graphrag import community_summarizer, util
+from langchain_community.graphs.graph_document import GraphDocument, Node
+from pyTigerGraph import TigerGraphConnection
+
 from common.config import milvus_config
 from common.embeddings.embedding_services import EmbeddingModel
 from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore
 from common.extractors.BaseExtractor import BaseExtractor
 from common.logs.logwriter import LogWriter
-from graphrag import util
-from langchain_community.graphs.graph_document import GraphDocument, Node
-from pyTigerGraph import TigerGraphConnection
 
 vertex_field = milvus_config.get("vertex_field", "vertex_id")
 
@@ -298,14 +299,14 @@ async def resolve_entity(
             f"aget_k_closest should, minimally, return the entity itself.\n{results}"
         )
         raise Exception()
-    if entity_id == "Dataframe":
-        print("result:", entity_id, results)
+    # FIXME: deleteme
+    # if entity_id == "Dataframe":
+    # print("result:", entity_id, results)
 
     # merge all entities into the ResolvedEntity vertex
     # use the longest v_id as the resolved entity's v_id
-    resolved_entity_id = ""
+    resolved_entity_id = entity_id
     for v in results:
-        # v_id = v.metadata["vertex_id"]
         if len(v) > len(resolved_entity_id):
             resolved_entity_id = v
 
@@ -318,7 +319,7 @@ async def resolve_entity(
                 "ResolvedEntity",  # v_type
                 resolved_entity_id,  # v_id
                 {  # attrs
-                    "description": []
+                    # "id": resolved_entity_id,
                 },
             ),
         )
@@ -340,3 +341,49 @@ async def resolve_entity(
                 ),
             )
         )
+
+
+async def process_community(
+    conn: TigerGraphConnection,
+    upsert_chan: Channel,
+    i: int,
+    c: str,
+):
+    """
+    https://github.com/microsoft/graphrag/blob/main/graphrag/prompt_tune/template/community_report_summarization.py
+
+    Get children verts (Entity for layer-1 Communities, Community otherwise)
+    if the commuinty only has one child, use its description -- no need to summarize
+
+    embed summaries
+    """
+    print(i, c, flush=True)
+
+    # get the children of the community
+    children = await util.get_commuinty_children(conn, i, c)
+    if i == 1:
+        tmp = []
+        for c in children:
+            tmp.extend(c)
+        children = list(filter(lambda x: len(x) > 0, tmp))
+    print(">>>", children, flush=True)
+    llm = ecc_util.get_llm_service()
+    summarizer = community_summarizer.CommunitySummarizer(llm)
+    summary = await summarizer.summarize(c, children)
+    await upsert_chan.put((upsert_summary, (conn,summary)))
+
+
+async def upsert_summary(conn: TigerGraphConnection, summary: str):
+    print(f"SUMMARY:> {summary}", flush=True)
+
+    # vertex_id = vertex_id.replace(" ", "_")
+    # attrs = map_attrs(attributes)
+    # data = json.dumps({"vertices": {vertex_type: {vertex_id: attrs}}})
+    # headers = make_headers(conn)
+    # async with httpx.AsyncClient(timeout=http_timeout) as client:
+    #     res = await client.post(
+    #         f"{conn.restppUrl}/graph/{conn.graphname}", data=data, headers=headers
+    #     )
+    #
+    #     res.raise_for_status()
+    #
diff --git a/eventual-consistency-service/requirements.txt b/eventual-consistency-service/requirements.txt
index 3bc0dae0..5d566dd1 100644
--- a/eventual-consistency-service/requirements.txt
+++ b/eventual-consistency-service/requirements.txt
@@ -7,6 +7,7 @@ appdirs==1.4.4
 argon2-cffi==23.1.0
 argon2-cffi-bindings==21.2.0
 async-timeout==4.0.3
+asyncer==0.0.7
 attrs==23.1.0
 azure-core==1.30.1
 azure-storage-blob==12.19.1
@@ -24,12 +25,15 @@ cryptography==42.0.5
 dataclasses-json==0.5.14
 distro==1.8.0
 docker-pycreds==0.4.0
+docstring_parser==0.16
 emoji==2.8.0
 environs==9.5.0
 exceptiongroup==1.1.3
 fastapi==0.103.1
+filelock==3.15.4
 filetype==1.2.0
 frozenlist==1.4.0
+fsspec==2024.6.1
 gitdb==4.0.11
 GitPython==3.1.40
 google-api-core==2.14.0
@@ -51,24 +55,28 @@ h11==0.14.0
 httpcore==0.18.0
 httptools==0.6.0
 httpx==0.25.0
-huggingface_hub==0.23.0
+huggingface-hub==0.23.0
 idna==3.4
+iniconfig==2.0.0
 isodate==0.6.1
+jiter==0.5.0
 jmespath==1.0.1
 joblib==1.3.2
 jq==1.6.0
 jsonpatch==1.33
 jsonpointer==2.4
-langchain==0.1.12
-langchain-community==0.0.28
-langchain-core==0.1.49
-langchain-experimental==0.0.54
+langchain==0.2.12
+langchain-community==0.2.11
+langchain-core==0.2.29
+langchain-experimental==0.0.64
 langchain-groq==0.1.3
-langchain-text-splitters==0.0.1
+langchain-openai==0.1.20
+langchain-text-splitters==0.2.2
 langchainhub==0.1.14
 langdetect==1.0.9
 langgraph==0.0.40
-langsmith==0.1.24
+langsmith==0.1.98
+Levenshtein==0.25.1
 lxml==4.9.3
 marshmallow==3.20.1
 minio==7.2.5
@@ -76,11 +84,12 @@ multidict==6.0.4
 mypy-extensions==1.0.0
 nltk==3.8.1
 numpy==1.26.4
-openai==1.3.7
+openai==1.40.2
 orjson==3.9.15
 packaging==23.2
 pandas==2.1.1
 pathtools==0.1.2
+pluggy==1.5.0
 prometheus_client==0.20.0
 proto-plus==1.22.3
 protobuf==4.24.4
@@ -94,15 +103,16 @@ pydantic==2.3.0
 pydantic_core==2.6.3
 pygit2==1.13.2
 pymilvus==2.3.6
+pytest==8.2.0
 python-dateutil==2.8.2
 python-dotenv==1.0.0
 python-iso639==2023.6.15
 python-magic==0.4.27
 pyTigerDriver==1.0.15
-pyTigerGraph==1.6.1
+pyTigerGraph==1.6.5
 pytz==2023.3.post1
 PyYAML==6.0.1
-rapidfuzz==3.4.0
+rapidfuzz==3.9.6
 regex==2023.10.3
 requests==2.31.0
 rsa==4.9
@@ -118,12 +128,12 @@ SQLAlchemy==2.0.20
 starlette==0.27.0
 tabulate==0.9.0
 tenacity==8.2.3
-tiktoken==0.5.1
+tiktoken==0.7.0
 tqdm==4.66.1
 types-requests==2.31.0.6
 types-urllib3==1.26.25.14
 typing-inspect==0.9.0
-typing_extensions==4.7.1
+typing_extensions==4.12.2
 tzdata==2023.3
 ujson==5.9.0
 unstructured==0.10.23

From ef842ba278fd8cadd9b5be54dd6800040386cb8b Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Mon, 12 Aug 2024 16:50:33 -0400
Subject: [PATCH 10/53] graphrag pipeline done

---
 common/embeddings/milvus_embedding_store.py   |   3 -
 .../gsql/graphRAG/communities_have_desc.gsql  |  14 ++
 .../louvain_old/louvain_1_first_pass.gsql     | 176 --------------
 .../louvain_old/louvain_2_other_passes.gsql   | 217 ------------------
 .../louvain_3_final_community.gsql            |  44 ----
 .../louvain_4_modularity_1_for_pass.gsql      |  39 ----
 .../louvain_4_modularity_2_final.gsql         |  52 -----
 .../graphRAG/louvain_old/louvain_5_reset.gsql |  13 --
 copilot/docs/notebooks/graphrag.ipynb         |  82 +++++--
 .../app/graphrag/community_summarizer.py      | 110 +--------
 .../app/graphrag/graph_rag.py                 |  97 ++++----
 .../app/graphrag/util.py                      |  30 ++-
 .../app/graphrag/workers.py                   |  58 ++---
 13 files changed, 196 insertions(+), 739 deletions(-)
 create mode 100644 common/gsql/graphRAG/communities_have_desc.gsql
 delete mode 100644 common/gsql/graphRAG/louvain_old/louvain_1_first_pass.gsql
 delete mode 100644 common/gsql/graphRAG/louvain_old/louvain_2_other_passes.gsql
 delete mode 100644 common/gsql/graphRAG/louvain_old/louvain_3_final_community.gsql
 delete mode 100644 common/gsql/graphRAG/louvain_old/louvain_4_modularity_1_for_pass.gsql
 delete mode 100644 common/gsql/graphRAG/louvain_old/louvain_4_modularity_2_final.gsql
 delete mode 100644 common/gsql/graphRAG/louvain_old/louvain_5_reset.gsql

diff --git a/common/embeddings/milvus_embedding_store.py b/common/embeddings/milvus_embedding_store.py
index fd57c783..7384e76f 100644
--- a/common/embeddings/milvus_embedding_store.py
+++ b/common/embeddings/milvus_embedding_store.py
@@ -606,9 +606,6 @@ def edit_dist_check(self, a: str, b: str, edit_dist_threshold: float, p=False):
     async def aget_k_closest(
         self, v_id: str, k=15, threshold_similarity=0.90, edit_dist_threshold_pct=0.75
     ) -> list[Document]:
-        """
-        asdf
-        """
         threshold_dist = 1 - threshold_similarity
 
         # asyncify necessary funcs
diff --git a/common/gsql/graphRAG/communities_have_desc.gsql b/common/gsql/graphRAG/communities_have_desc.gsql
new file mode 100644
index 00000000..f5cda70e
--- /dev/null
+++ b/common/gsql/graphRAG/communities_have_desc.gsql
@@ -0,0 +1,14 @@
+CREATE DISTRIBUTED QUERY communities_have_desc(UINT iter) SYNTAX V2{
+    SumAccum<INT> @@descrs;
+    Comms = {Community.*};
+    Comms = SELECT c FROM Comms:c
+            WHERE c.iteration == iter
+            ACCUM
+                IF length(c.description) > 0 THEN
+                    @@descrs += 1
+                END;
+
+    
+    PRINT (@@descrs == Comms.size()) as all_have_desc;
+    PRINT @@descrs, Comms.size();
+}
diff --git a/common/gsql/graphRAG/louvain_old/louvain_1_first_pass.gsql b/common/gsql/graphRAG/louvain_old/louvain_1_first_pass.gsql
deleted file mode 100644
index 0251909f..00000000
--- a/common/gsql/graphRAG/louvain_old/louvain_1_first_pass.gsql
+++ /dev/null
@@ -1,176 +0,0 @@
-CREATE OR REPLACE DISTRIBUTED QUERY graphRAG_louvain_1(
-  UINT max_hop = 10,
-  UINT batch_num = 12,
-  UINT sample_edge_num = 100
-) {
-
-  TYPEDEF TUPLE <DOUBLE delta_Q_add, VERTEX<ResolvedEntity> community, STRING ext_vid> MyTuple; //--> this should be Community, I think
-  SumAccum<DOUBLE> @@m; // the sum of the weights of all the links in the network
-  MinAccum<VERTEX<ResolvedEntity>> @{community_id_attribute_name}; // the community ID of the node
-  MinAccum<STRING> @community_vid; // the community ID of the node
-  SumAccum<DOUBLE> @k; // the sum of the weights of the links incident to the node
-  SumAccum<DOUBLE> @k_in; // the sum of the weights of the links inside the previous community of the node
-  SumAccum<DOUBLE> @k_self_loop; // the weight of the self-loop link
-  MapAccum<VERTEX<ResolvedEntity>, SumAccum<DOUBLE>> @community_k_in_map; // the community of the neighbors of the nodes -> the sum of the weights of the links inside the community
-  MapAccum<VERTEX<ResolvedEntity>, SumAccum<DOUBLE>> @@community_sum_total_map; // community ID C -> the sum of the weights of the links incident to nodes in C
-  SumAccum<DOUBLE> @community_sum_total; // the sum of the weights of the links incident to nodes in the community of the node
-  MapAccum<VERTEX<ResolvedEntity>, SumAccum<DOUBLE>> @@community_sum_in_map; // community ID -> the sum of the weights of the links inside the community
-  MapAccum<VERTEX<ResolvedEntity>, MapAccum<VERTEX<ResolvedEntity>, SumAccum<DOUBLE>>> @@source_target_k_in_map; // source community ID -> (target community ID -> the sum of the weights of the links from the source community to the target community)
-  SumAccum<DOUBLE> @delta_Q_remove; // delta Q to remove the node from the previous community
-  MaxAccum<MyTuple> @best_move; // best move of the node with the highest delta Q to move the isolated node into the new community
-  MaxAccum<DOUBLE> @@min_double; // used to reset the @best_move
-  SumAccum<INT> @@move_cnt;
-  OrAccum @to_change_community;
-  SumAccum<INT> @batch_id;
-  SumAccum<INT> @vid;
-
-  DOUBLE wt = 1.0;
-
-  // Initialization
-  All_Nodes = {{ResolvedEntity.*}};
-  All_Nodes = SELECT s FROM All_Nodes:s -({relation_edge_name}:e)- :t
-              ACCUM @@m += wt / 2,
-                    s.@k += wt,
-                    IF s == t THEN // self-loop link
-                      js.@k_self_loop += wt
-                    END
-              POST-ACCUM
-                s.@{community_id_attribute_name} = s,
-                s.@community_vid = to_string(s.id),
-                s.@vid = getvid(s),
-                s.@batch_id = s.@vid % batch_num;
-
-  IF @@m < 0.00000000001 THEN
-    PRINT "Warning: the sum of the weights in the edges should be greater than zero!";
-    RETURN;
-  END;
-
-  // Local moving
-  INT hop = 0;
-  Candidates = All_Nodes;
-  WHILE Candidates.size() > 0 AND hop < max_hop DO
-    hop = hop + 1;
-    LOG(TRUE, hop);
-    IF hop == 1 THEN // first iteration
-      ChangedNodes = SELECT s FROM Candidates:s -({relation_edge_name}:e)- :t
-                      WHERE s.@{community_id_attribute_name} != t.@{community_id_attribute_name}
-                      ACCUM s.@best_move += MyTuple(1 - s.@k * t.@k / (2 * @@m), t.@{community_id_attribute_name}, t.@community_vid)
-                      POST-ACCUM
-                        IF s.@best_move.delta_Q_add > 0 THEN // the gain (delta Q) is positive
-                          s.@to_change_community = TRUE
-                        END
-                      HAVING s.@to_change_community == TRUE;
-
-    ELSE // remaining iterations
-      // Calculate sum_total
-      Tmp = SELECT s FROM All_Nodes:s
-            POST-ACCUM
-              @@community_sum_total_map += (s.@{community_id_attribute_name} -> s.@k);
-      Tmp = SELECT s FROM All_Nodes:s
-            POST-ACCUM
-              s.@community_sum_total = @@community_sum_total_map.get(s.@{community_id_attribute_name});
-
-      @@community_sum_total_map.clear();
-      // Find the best move
-      ChangedNodes = {{}};
-      FOREACH batch_id IN RANGE[0, batch_num-1] DO
-        LOG(TRUE, batch_id);
-        // Calculate the delta Q to remove the node from the previous community
-        Nodes = SELECT s FROM Candidates:s -({relation_edge_name}:e)- :t
-                WHERE s.@batch_id == batch_id
-                ACCUM 
-                  IF s.@{community_id_attribute_name} == t.@{community_id_attribute_name} THEN
-                         s.@k_in += wt
-                  ELSE
-                    s.@community_k_in_map += (t.@{community_id_attribute_name} -> wt)
-                  END
-                POST-ACCUM
-                  s.@delta_Q_remove = 2 * s.@k_self_loop - 2 * s.@k_in + s.@k * (s.@community_sum_total - s.@k) / @@m,
-                  s.@k_in = 0,
-                  s.@best_move = MyTuple(@@min_double, s, to_string(s.id)) // reset the delta_Q_add
-        ;
-
-        // Find the best move
-        Nodes = SELECT s FROM Nodes:s -({relation_edge_name}:e)- :t
-                  //SAMPLE sample_edge_num EDGE WHEN s.outdegree("{relation_edge_name}") > sample_edge_num
-                  WHERE s.@{community_id_attribute_name} != t.@{community_id_attribute_name}
-                  ACCUM DOUBLE delta_Q_add = 2 * s.@community_k_in_map.get(t.@{community_id_attribute_name}) - s.@k * t.@community_sum_total / @@m,
-                    s.@best_move += MyTuple(delta_Q_add, t.@{community_id_attribute_name}, t.@community_vid)
-                  POST-ACCUM
-                    IF s.@delta_Q_remove + s.@best_move.delta_Q_add > 0 THEN // the gain (delta Q) is positive
-                      s.@to_change_community = TRUE
-                    END,
-                    s.@community_k_in_map.clear()
-                  HAVING s.@to_change_community == TRUE;
-
-        ChangedNodes = ChangedNodes UNION Nodes;
-      END;
-    END;
-    // If two nodes swap, only change the community of one of them
-    SwapNodes = SELECT s FROM ChangedNodes:s -({relation_edge_name}:e)- :t
-                  WHERE s.@best_move.community == t.@{community_id_attribute_name}
-                        AND t.@to_change_community == TRUE
-                        AND t.@best_move.community == s.@{community_id_attribute_name}
-                        // only change the one with larger delta Q or the one with smaller @vid if delta Q are the same
-                        AND (
-                          s.@delta_Q_remove + s.@best_move.delta_Q_add < t.@delta_Q_remove + t.@best_move.delta_Q_add
-                          OR (
-                            abs((s.@delta_Q_remove + s.@best_move.delta_Q_add) - (t.@delta_Q_remove + t.@best_move.delta_Q_add)) < 0.00000000001
-                            AND s.@vid > t.@vid
-                          )
-                        )
-                  POST-ACCUM
-                    s.@to_change_community = FALSE;
-
-    ChangedNodes = ChangedNodes MINUS SwapNodes;
-
-    // Place each node of ChangedNodes in the community in which the gain is maximum
-    ChangedNodes = SELECT s FROM ChangedNodes:s
-                   POST-ACCUM
-                     s.@{community_id_attribute_name} = s.@best_move.community,
-                     s.@community_vid = s.@best_move.ext_vid,
-                     s.@to_change_community = FALSE;
-      
-    @@move_cnt += ChangedNodes.size();
-
-    // Get all neighbours of the changed node that do not belong to the node’s new community
-    Candidates = SELECT t FROM ChangedNodes:s -({relation_edge_name}:e)- :t
-                 WHERE t.@{community_id_attribute_name} != s.@{community_id_attribute_name};
-  END;
-
-  PRINT @@move_cnt AS Delta;
-  
-  // Coarsening
-  UINT new_layer = 0;
-  @@community_sum_total_map.clear();
-  Tmp =
-    SELECT s FROM All_Nodes:s -({relation_edge_name}:e)- :t
-    ACCUM
-      IF s.@{community_id_attribute_name} == t.@{community_id_attribute_name} THEN
-        @@community_sum_in_map += (s.@{community_id_attribute_name} -> wt)
-      END
-    POST-ACCUM
-      //f_belongs_to.println(s.id, s.@{community_id_attribute_name}, new_layer),
-      INSERT INTO {belongs_to_edge_name} VALUES (s, str_to_int(s.@community_vid), new_layer),
-      IF @@community_sum_in_map.containsKey(s) THEN
-        //f_links_to.println(s.id, s.id, @@community_sum_in_map.get(s), new_layer)
-        INSERT INTO {links_to_edge_name} VALUES (s,s, (new_layer -> @@community_sum_in_map.get(s)))
-      END;
-  
-  @@community_sum_in_map.clear();
-
-  Tmp = SELECT s FROM All_Nodes:s -({relation_edge_name}:e)- :t
-      ACCUM
-        IF s.@{community_id_attribute_name} != t.@{community_id_attribute_name} THEN
-          @@source_target_k_in_map += (s.@{community_id_attribute_name} -> (t.@{community_id_attribute_name} -> wt))
-        END
-      POST-ACCUM
-        IF @@source_target_k_in_map.containsKey(s) THEN
-          FOREACH (target_community, k_in) IN @@source_target_k_in_map.get(s) DO
-            //f_links_to.println(s.id, target_community, k_in, new_layer)
-            INSERT INTO {links_to_edge_name} VALUES (s,target_community, (new_layer -> k_in))
-          END
-        END;
-
-  @@source_target_k_in_map.clear();
-}
diff --git a/common/gsql/graphRAG/louvain_old/louvain_2_other_passes.gsql b/common/gsql/graphRAG/louvain_old/louvain_2_other_passes.gsql
deleted file mode 100644
index 231631d6..00000000
--- a/common/gsql/graphRAG/louvain_old/louvain_2_other_passes.gsql
+++ /dev/null
@@ -1,217 +0,0 @@
-USE GRAPH {graph_name}
-DROP QUERY {query_name}
-CREATE OR REPLACE DISTRIBUTED QUERY graphRAG_louvain_2(
-  UINT layer = 0,
-  UINT max_hop = 10,
-  UINT batch_num = 1
-) FOR GRAPH {graph_name} SYNTAX v1 {{
-  TYPEDEF TUPLE <DOUBLE delta_Q_add, VERTEX<{entity_vertex_name}> community, STRING ext_vid> MyTuple;
-  SumAccum<DOUBLE> @@m; // the sum of the weights of all the links in the network
-  MinAccum<VERTEX<{entity_vertex_name}>> @{community_id_attribute_name}; // the community ID of the node
-  MinAccum<STRING> @community_vid; // the community ID of the node
-  SumAccum<DOUBLE> @k; // the sum of the weights of the links incident to the node
-  SumAccum<DOUBLE> @k_in; // the sum of the weights of the links inside the previous community of the node
-  SumAccum<DOUBLE> @k_self_loop; // the weight of the self-loop link
-  MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>> @community_k_in_map; // the community of the neighbors of the nodes -> the sum of the weights of the links inside the community
-  MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>> @@community_sum_total_map; // community ID C -> the sum of the weights of the links incident to nodes in C
-  SumAccum<DOUBLE> @community_sum_total; // the sum of the weights of the links incident to nodes in the community of the node
-  MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>> @@community_sum_in_map; // community ID -> the sum of the weights of the links inside the community
-  MapAccum<VERTEX<{entity_vertex_name}>, MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>>> @@source_target_k_in_map; // source community ID -> (target community ID -> the sum of the weights of the links from the source community to the target community)
-  SumAccum<DOUBLE> @delta_Q_remove; // delta Q to remove the node from the previous community
-  MaxAccum<MyTuple> @best_move; // best move of the node with the highest delta Q to move the isolated node into the new community
-  MaxAccum<DOUBLE> @@min_double; // used to reset the @best_move
-  SumAccum<INT> @@move_cnt;
-  OrAccum @to_change_community;
-  SumAccum<INT> @batch_id;
-  SumAccum<INT> @vid;
-  SumAccum<INT> @@links_to_check;
-
-  // Initialization
-  LOG(TRUE, "Query started!");
-  All_Nodes = {{{entity_vertex_name}.*}};
-  _tmp = 
-  SELECT s
-  FROM All_Nodes:s -({links_to_edge_name}:e)- :t
-  ACCUM
-    @@links_to_check += 1;
-  
-  All_Nodes =
-    SELECT s
-    FROM   All_Nodes:s -({links_to_edge_name}:e)- :t
-    WHERE  e.layer_weight_map.containsKey(layer)
-    ACCUM  DOUBLE weight = e.layer_weight_map.get(layer),
-           @@m += weight / 2,
-           s.@k += weight,
-           IF s == t THEN // self-loop link
-             s.@k_self_loop += weight
-           END
-    POST-ACCUM
-           s.@{community_id_attribute_name} = s,
-           s.@community_vid = to_string(s.id),
-           s.@vid = getvid(s),
-           s.@batch_id = s.@vid % batch_num
-  ;
-  LOG(TRUE, All_Nodes.size());
-  IF @@m < 0.00000000001 THEN
-    PRINT "Warning: the sum of the weights in the edges should be greater than zero!";
-    RETURN;
-  END;
-
-  // Local moving
-  INT hop = 0;
-  Candidates = All_Nodes;
-  WHILE Candidates.size() > 0 AND hop < max_hop DO
-    hop = hop + 1;
-    LOG(TRUE, hop);
-    IF hop == 1 THEN // first iteration
-      ChangedNodes =
-        SELECT s
-        FROM   Candidates:s -({links_to_edge_name}:e)- :t
-        WHERE  e.layer_weight_map.containsKey(layer)
-               AND s.@{community_id_attribute_name} != t.@{community_id_attribute_name}
-        ACCUM  s.@best_move += MyTuple(1 - s.@k * t.@k / (2 * @@m), t.@{community_id_attribute_name}, t.@community_vid)
-        POST-ACCUM
-               IF s.@best_move.delta_Q_add > 0 THEN // the gain (delta Q) is positive
-                 s.@to_change_community = TRUE
-               END
-        HAVING s.@to_change_community == TRUE
-      ;
-    ELSE // remaining iterations
-      // Calculate sum_total
-      Tmp =
-        SELECT s
-        FROM   All_Nodes:s
-        POST-ACCUM
-               @@community_sum_total_map += (s.@{community_id_attribute_name} -> s.@k)
-      ;
-      Tmp =
-        SELECT s
-        FROM   All_Nodes:s
-        POST-ACCUM
-               s.@community_sum_total = @@community_sum_total_map.get(s.@{community_id_attribute_name})
-      ;
-      LOG(TRUE, @@community_sum_total_map.size());
-      @@community_sum_total_map.clear();
-      // Find the best move
-      ChangedNodes = {{}};
-      FOREACH batch_id IN RANGE[0, batch_num-1] DO
-        LOG(TRUE, batch_id);
-        // Calculate the delta Q to remove the node from the previous community
-        Nodes =
-          SELECT s
-          FROM   Candidates:s -({links_to_edge_name}:e)- :t
-          WHERE  e.layer_weight_map.containsKey(layer)
-                 AND s.@batch_id == batch_id
-          ACCUM  DOUBLE weight = e.layer_weight_map.get(layer),
-                 IF s.@{community_id_attribute_name} == t.@{community_id_attribute_name} THEN
-                   s.@k_in += weight
-                 ELSE
-                   s.@community_k_in_map += (t.@{community_id_attribute_name} -> weight)
-                 END
-          POST-ACCUM
-                 s.@delta_Q_remove = 2 * s.@k_self_loop - 2 * s.@k_in + s.@k * (s.@community_sum_total - s.@k) / @@m,
-                 s.@k_in = 0,
-                 s.@best_move = MyTuple(@@min_double, s, to_string(s.id)) // reset the delta_Q_add
-        ;
-        // Find the best move
-        Nodes =
-          SELECT s
-          FROM   Nodes:s -({links_to_edge_name}:e)- :t
-          WHERE  e.layer_weight_map.containsKey(layer)
-                 AND s.@{community_id_attribute_name} != t.@{community_id_attribute_name}
-          ACCUM  DOUBLE delta_Q_add = 2 * s.@community_k_in_map.get(t.@{community_id_attribute_name}) - s.@k * t.@community_sum_total / @@m,
-                 s.@best_move += MyTuple(delta_Q_add, t.@{community_id_attribute_name}, t.@community_vid)
-          POST-ACCUM
-                 IF s.@delta_Q_remove + s.@best_move.delta_Q_add > 0 THEN // the gain (delta Q) is positive
-                   s.@to_change_community = TRUE
-                 END,
-                 s.@community_k_in_map.clear()
-          HAVING s.@to_change_community == TRUE
-        ;
-        ChangedNodes = ChangedNodes UNION Nodes;
-      END;
-    END;
-    // If two nodes swap, only change the community of one of them
-    SwapNodes =
-      SELECT s
-      FROM   ChangedNodes:s -({links_to_edge_name}:e)- :t
-      WHERE  e.layer_weight_map.containsKey(layer)
-             AND s.@best_move.community == t.@{community_id_attribute_name}
-             AND t.@to_change_community == TRUE
-             AND t.@best_move.community == s.@{community_id_attribute_name}
-             // only change the one with larger delta Q or the one with smaller @vid if delta Q are the same
-             AND (s.@delta_Q_remove + s.@best_move.delta_Q_add < t.@delta_Q_remove + t.@best_move.delta_Q_add
-                  OR (abs((s.@delta_Q_remove + s.@best_move.delta_Q_add) - (t.@delta_Q_remove + t.@best_move.delta_Q_add)) < 0.00000000001
-                      AND s.@vid > t.@vid))
-      POST-ACCUM
-             s.@to_change_community = FALSE
-    ;
-    LOG(TRUE, SwapNodes.size());
-    ChangedNodes = ChangedNodes MINUS SwapNodes;
-    LOG(TRUE, ChangedNodes.size());
-    // Place each node of ChangedNodes in the community in which the gain is maximum
-    ChangedNodes =
-      SELECT s
-      FROM   ChangedNodes:s
-      POST-ACCUM
-             s.@{community_id_attribute_name} = s.@best_move.community,
-             s.@community_vid = s.@best_move.ext_vid,
-             s.@to_change_community = FALSE
-    ;
-  
-    @@move_cnt += ChangedNodes.size();
-    // Get all neighbours of the changed node that do not belong to the node’s new community
-    Candidates =
-      SELECT t
-      FROM   ChangedNodes:s -({links_to_edge_name}:e)- :t
-      WHERE  e.layer_weight_map.containsKey(layer)
-             AND t.@{community_id_attribute_name} != s.@{community_id_attribute_name}
-    ;
-    LOG(TRUE, Candidates.size());
-  END;
-
-  PRINT @@move_cnt AS Delta;
-  
-  // Coarsening
-  LOG(TRUE, "Coarsening");
-  UINT new_layer = layer + 1;
-  @@community_sum_total_map.clear();
-  Tmp =
-    SELECT s
-    FROM   All_Nodes:s -({links_to_edge_name}:e)- :t
-    WHERE  e.layer_weight_map.containsKey(layer)
-    ACCUM  IF s.@{community_id_attribute_name} == t.@{community_id_attribute_name} THEN
-             DOUBLE weight = e.layer_weight_map.get(layer),
-             @@community_sum_in_map += (s.@{community_id_attribute_name} -> weight)
-           END
-    POST-ACCUM
-           //f_belongs_to.println(s.id, s.@{community_id_attribute_name}, new_layer),
-           INSERT INTO {belongs_to_edge_name} VALUES (s, str_to_int(s.@community_vid), new_layer),
-           IF @@community_sum_in_map.containsKey(s) THEN
-             //f_links_to.println(s.id, s.id, @@community_sum_in_map.get(s), new_layer)
-             INSERT INTO {links_to_edge_name} VALUES (s,s, (new_layer -> @@community_sum_in_map.get(s)))
-           END
-  ;
-  LOG(TRUE, @@community_sum_in_map.size());
-  @@community_sum_in_map.clear();
-  Tmp =
-    SELECT s
-    FROM   All_Nodes:s -({links_to_edge_name}:e)- :t
-    WHERE  e.layer_weight_map.containsKey(layer)
-    ACCUM  DOUBLE weight = e.layer_weight_map.get(layer),
-           IF s.@{community_id_attribute_name} != t.@{community_id_attribute_name} THEN
-             @@source_target_k_in_map += (s.@{community_id_attribute_name} -> (t.@{community_id_attribute_name} -> weight))
-           END
-    POST-ACCUM
-           IF @@source_target_k_in_map.containsKey(s) THEN
-             FOREACH (target_community, k_in) IN @@source_target_k_in_map.get(s) DO
-               //f_links_to.println(s.uniq_id, target_community, k_in, new_layer)
-               INSERT INTO {links_to_edge_name} VALUES (s,target_community, (new_layer -> k_in))
-             END
-           END
-  ;
-  LOG(TRUE, @@source_target_k_in_map.size());
-  @@source_target_k_in_map.clear();
-  PRINT @@links_to_check;
-  LOG(TRUE, "Query finished!");
-}}
diff --git a/common/gsql/graphRAG/louvain_old/louvain_3_final_community.gsql b/common/gsql/graphRAG/louvain_old/louvain_3_final_community.gsql
deleted file mode 100644
index 75cbad7e..00000000
--- a/common/gsql/graphRAG/louvain_old/louvain_3_final_community.gsql
+++ /dev/null
@@ -1,44 +0,0 @@
-USE GRAPH {graph_name}
-DROP QUERY {query_name}
-CREATE OR REPLACE DISTRIBUTED QUERY graphRAG_louvain_3(
-  UINT top_layer = 2
-) FOR GRAPH {graph_name} SYNTAX v1 {{
-  MinAccum<STRING> @{community_id_attribute_name}; // the community ID of the node
-  INT layer = top_layer;
-
-  // Initialization
-  LOG(TRUE, "Query started!");
-  All_Nodes = {{{entity_vertex_name}.*}};
-
-  // Top layer
-  Nodes =
-    SELECT t
-    FROM   All_Nodes:s -(reverse_{belongs_to_edge_name}:e)- :t
-    WHERE  layer IN e.layer_set
-    ACCUM  t.@{community_id_attribute_name} = to_string(s.id)
-  ;
-  LOG(TRUE, layer, Nodes.size());
-
-  // Other layers
-  WHILE Nodes.size() > 0 AND layer > 0 DO
-    layer = layer - 1;
-    Nodes =
-      SELECT t
-      FROM   Nodes:s -(reverse_{belongs_to_edge_name}:e)- :t
-      WHERE  layer IN e.layer_set
-      ACCUM  t.@{community_id_attribute_name} = s.@{community_id_attribute_name}
-    ;
-    LOG(TRUE, layer, Nodes.size());
-  END;
-
-  // Write to the file
-  Nodes =
-    SELECT s
-    FROM   Nodes:s
-    POST-ACCUM
-           //f.println(s.uniq_id, s.@{community_id_attribute_name})
-           s.{community_id_attribute_name} = s.@{community_id_attribute_name}
-           
-  ; 
-  LOG(TRUE, "Query finished!");
-}}
diff --git a/common/gsql/graphRAG/louvain_old/louvain_4_modularity_1_for_pass.gsql b/common/gsql/graphRAG/louvain_old/louvain_4_modularity_1_for_pass.gsql
deleted file mode 100644
index 0058d0ee..00000000
--- a/common/gsql/graphRAG/louvain_old/louvain_4_modularity_1_for_pass.gsql
+++ /dev/null
@@ -1,39 +0,0 @@
-USE GRAPH {graph_name}
-DROP QUERY {query_name}
-CREATE OR REPLACE DISTRIBUTED QUERY graphRAG_louvain_4a(
-  UINT layer=0
-) FOR GRAPH {graph_name} SYNTAX v1 {{
-  SumAccum<DOUBLE> @@sum_weight; // the sum of the weights of all the links in the network
-  MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>> @@community_total_weight_map; // community ID C -> the sum of the weights of the links incident to nodes in C
-  MapAccum<VERTEX<{entity_vertex_name}>, SumAccum<DOUBLE>> @@community_in_weight_map; // community ID -> the sum of the weights of the links inside the community
-  SumAccum<DOUBLE> @@modularity;
-
-  All_Nodes = {{{entity_vertex_name}.*}};
-  All_Nodes =
-    SELECT s
-    FROM   All_Nodes:s -({links_to_edge_name}:e)- :t
-    WHERE  e.layer_weight_map.containsKey(layer)
-    ACCUM  DOUBLE weight = e.layer_weight_map.get(layer),
-           IF s == t THEN
-             @@community_in_weight_map += (s -> weight)
-           END,
-           @@community_total_weight_map += (s -> weight),
-           @@sum_weight += weight
-  ;
-  LOG(TRUE, All_Nodes.size());
-  @@modularity = 0;
-  FOREACH (community, total_weight) IN @@community_total_weight_map DO
-    DOUBLE in_weight = 0;
-    IF @@community_in_weight_map.containsKey(community) THEN
-      in_weight = @@community_in_weight_map.get(community);
-    END;
-    @@modularity += in_weight / @@sum_weight - pow(total_weight / @@sum_weight, 2);
-  END;
-  // PRINT @@modularity, @@community_in_weight_map, @@community_total_weight_map, @@sum_weight;
-  PRINT layer;
-  PRINT @@modularity AS modularity;
-  PRINT @@community_total_weight_map.size() AS community_number;
-  PRINT All_Nodes.size();
-  @@community_in_weight_map.clear();
-  @@community_total_weight_map.clear();
-}}
diff --git a/common/gsql/graphRAG/louvain_old/louvain_4_modularity_2_final.gsql b/common/gsql/graphRAG/louvain_old/louvain_4_modularity_2_final.gsql
deleted file mode 100644
index 31ba4d0b..00000000
--- a/common/gsql/graphRAG/louvain_old/louvain_4_modularity_2_final.gsql
+++ /dev/null
@@ -1,52 +0,0 @@
-USE GRAPH {graph_name}
-DROP QUERY {query_name}
-CREATE OR REPLACE DISTRIBUTED QUERY graphRAG_louvain_4b(
-) FOR GRAPH {graph_name} SYNTAX v1 {{
-  SumAccum<DOUBLE> @@sum_weight; // the sum of the weights of all the links in the network
-  MapAccum<STRING, SumAccum<DOUBLE>> @@community_total_weight_map; // community ID C -> the sum of the weights of the links incident to nodes in C
-  MapAccum<STRING, SumAccum<DOUBLE>> @@community_in_weight_map; // community ID -> the sum of the weights of the links inside the community
-  SumAccum<DOUBLE> @@modularity;
-  MapAccum<STRING, SumAccum<INT>> @@Community_sizes;
-  MapAccum<INT, SumAccum<INT>> @@count_of_sizes;
-  AvgAccum @@avg_community_size;
-
-  DOUBLE wt = 1.0;
-  All_Nodes = {{{entity_vertex_name}.*}};
-  Nodes =
-    SELECT s
-    FROM   All_Nodes:s -({relation_edge_name}:e)- :t
-    ACCUM  IF s.{community_id_attribute_name} == t.{community_id_attribute_name} THEN
-             @@community_in_weight_map += (s.{community_id_attribute_name} -> wt)
-           END,
-           @@community_total_weight_map += (s.{community_id_attribute_name} -> wt),
-           @@sum_weight += wt
-  ;
-  @@modularity = 0;
-  FOREACH (community, total_weight) IN @@community_total_weight_map DO
-    DOUBLE in_weight = 0;
-    IF @@community_in_weight_map.containsKey(community) THEN
-      in_weight = @@community_in_weight_map.get(community);
-    END;
-    @@modularity += in_weight / @@sum_weight - pow(total_weight / @@sum_weight, 2);
-  END;
-  
-  _tmp = 
-    SELECT s
-    FROM All_Nodes:s
-    POST-ACCUM
-      @@Community_sizes += (s.{community_id_attribute_name} -> 1);
-  
-  FOREACH (comm, cnt) IN @@Community_sizes DO
-    @@count_of_sizes += (cnt -> 1);
-    @@avg_community_size += cnt;
-  END;
-  
-  // PRINT @@modularity, @@community_in_weight_map, @@community_total_weight_map, @@sum_weight;
-  PRINT @@modularity AS modularity;
-  PRINT @@community_total_weight_map.size() AS community_number;
-  PRINT @@count_of_sizes AS num_communities_by_size;
-  PRINT @@avg_community_size AS avg_community_size;
-  
-  @@community_in_weight_map.clear();
-  @@community_total_weight_map.clear();
-}}
diff --git a/common/gsql/graphRAG/louvain_old/louvain_5_reset.gsql b/common/gsql/graphRAG/louvain_old/louvain_5_reset.gsql
deleted file mode 100644
index 7590935a..00000000
--- a/common/gsql/graphRAG/louvain_old/louvain_5_reset.gsql
+++ /dev/null
@@ -1,13 +0,0 @@
-USE GRAPH {graph_name}
-DROP QUERY {query_name}
-CREATE OR REPLACE DISTRIBUTED QUERY graphRAG_louvain_5_reset(
-) FOR GRAPH {graph_name} SYNTAX v1 {{
-
-  // Initialization
-  Nodes = {{{entity_vertex_name}.*}};
-
-  // Top layer
-  DELETE e
-  FROM   Nodes:s -(({belongs_to_edge_name}|{links_to_edge_name}):e)- :t
-  ;
-}}
diff --git a/copilot/docs/notebooks/graphrag.ipynb b/copilot/docs/notebooks/graphrag.ipynb
index e915f392..411f5d62 100644
--- a/copilot/docs/notebooks/graphrag.ipynb
+++ b/copilot/docs/notebooks/graphrag.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -24,7 +24,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -33,7 +33,7 @@
        "'The graph GraphRAG_pytgdocs is created.'"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -45,7 +45,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -54,18 +54,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "{'host_name': 'https://algotesting.i.tgcloud.io',\n",
-       " 'schema_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_schema].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_schema\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 0)\\\\nTrying to add local vertex \\'DocumentChunk\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Document\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Concept\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Entity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Relationship\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'DocumentCollection\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Content\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Community\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'ResolvedEntity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CONTENT\\' and its reverse edge \\'reverse_HAS_CONTENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_CHILD_OF\\' and its reverse edge \\'reverse_IS_CHILD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_HEAD_OF\\' and its reverse edge \\'reverse_IS_HEAD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_TAIL\\' and its reverse edge \\'reverse_HAS_TAIL\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_RELATIONSHIP\\' and its reverse edge \\'reverse_DESCRIBES_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_ENTITY\\' and its reverse edge \\'reverse_DESCRIBES_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_ENTITY\\' and its reverse edge \\'reverse_CONTAINS_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'MENTIONS_RELATIONSHIP\\' and its reverse edge \\'reverse_MENTIONS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_AFTER\\' and its reverse edge \\'reverse_IS_AFTER\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CHILD\\' and its reverse edge \\'reverse_HAS_CHILD\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_RELATIONSHIP\\' and its reverse edge \\'reverse_HAS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_DOCUMENT\\' and its reverse edge \\'reverse_CONTAINS_DOCUMENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RELATIONSHIP\\' and its reverse edge \\'reverse_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVES_TO\\' and its reverse edge \\'reverse_RESOLVES_TO\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVED_RELATIONSHIP\\' and its reverse edge \\'reverse_RESOLVED_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IN_COMMUNITY\\' and its reverse edge \\'reverse_IN_COMMUNITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'LINKS_TO\\' and its reverse edge \\'reverse_LINKS_TO\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_PARENT\\' and its reverse edge \\'reverse_HAS_PARENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 1\\\\nThe job add_supportai_schema completes in 1.043 seconds!\\\\nLocal schema change succeeded.\"',\n",
-       " 'index_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_indexes].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_indexes\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 1)\\\\nTrying to add index \\'doc_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_indexepoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 2\\\\nThe job add_supportai_indexes completes in 1.066 seconds!\\\\nLocal schema change succeeded.\"'}"
+       " 'schema_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_schema].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_schema\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 0)\\\\nTrying to add local vertex \\'DocumentChunk\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Document\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Concept\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Entity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Relationship\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'DocumentCollection\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Content\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Community\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'ResolvedEntity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CONTENT\\' and its reverse edge \\'reverse_HAS_CONTENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_CHILD_OF\\' and its reverse edge \\'reverse_IS_CHILD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_HEAD_OF\\' and its reverse edge \\'reverse_IS_HEAD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_TAIL\\' and its reverse edge \\'reverse_HAS_TAIL\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_RELATIONSHIP\\' and its reverse edge \\'reverse_DESCRIBES_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_ENTITY\\' and its reverse edge \\'reverse_DESCRIBES_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_ENTITY\\' and its reverse edge \\'reverse_CONTAINS_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'MENTIONS_RELATIONSHIP\\' and its reverse edge \\'reverse_MENTIONS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_AFTER\\' and its reverse edge \\'reverse_IS_AFTER\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CHILD\\' and its reverse edge \\'reverse_HAS_CHILD\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_RELATIONSHIP\\' and its reverse edge \\'reverse_HAS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_DOCUMENT\\' and its reverse edge \\'reverse_CONTAINS_DOCUMENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RELATIONSHIP\\' and its reverse edge \\'reverse_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVES_TO\\' and its reverse edge \\'reverse_RESOLVES_TO\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVED_RELATIONSHIP\\' and its reverse edge \\'reverse_RESOLVED_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IN_COMMUNITY\\' and its reverse edge \\'reverse_IN_COMMUNITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'LINKS_TO\\' and its reverse edge \\'reverse_LINKS_TO\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_PARENT\\' and its reverse edge \\'reverse_HAS_PARENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 1\\\\nThe job add_supportai_schema completes in 1.845 seconds!\\\\nLocal schema change succeeded.\"',\n",
+       " 'index_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_indexes].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_indexes\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 1)\\\\nTrying to add index \\'doc_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_indexepoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 2\\\\nThe job add_supportai_indexes completes in 1.085 seconds!\\\\nLocal schema change succeeded.\"'}"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -79,7 +79,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -95,18 +95,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'job_name': 'load_documents_content_json_852f54bfd00a475fa4efc3ba9319f0ac',\n",
-       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_852f54bfd00a475fa4efc3ba9319f0ac.stream.SupportAI_GraphRAG_pytgdocs_6a6331e3e5e248eaae389788c9bab325.1723217024268',\n",
-       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_852f54bfd00a475fa4efc3ba9319f0ac.stream.SupportAI_GraphRAG_pytgdocs_6a6331e3e5e248eaae389788c9bab325.1723217024268'}"
+       "{'job_name': 'load_documents_content_json_cde7e4db979b4ba8a0b6ec5eb927f875',\n",
+       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_cde7e4db979b4ba8a0b6ec5eb927f875.stream.SupportAI_GraphRAG_pytgdocs_48ee36da7b7644e4995722a6e057d446.1723494758507',\n",
+       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_cde7e4db979b4ba8a0b6ec5eb927f875.stream.SupportAI_GraphRAG_pytgdocs_48ee36da7b7644e4995722a6e057d446.1723494758507'}"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -121,7 +121,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -145,7 +145,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
@@ -155,7 +155,7 @@
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[23], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43masdf\u001b[49m\n",
+      "Cell \u001b[0;32mIn[32], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43masdf\u001b[49m\n",
       "\u001b[0;31mNameError\u001b[0m: name 'asdf' is not defined"
      ]
     }
@@ -166,7 +166,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -183,22 +183,28 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sleep\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
-       "{'job_name': 'load_documents_content_json_3e62fb87723945ea9a0380956694b7ec',\n",
-       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_3e62fb87723945ea9a0380956694b7ec.stream.SupportAI_GraphRAG_pytgdocs_cc751adab29643b28af1b7bf13b6515b.1723213722186',\n",
-       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_3e62fb87723945ea9a0380956694b7ec.stream.SupportAI_GraphRAG_pytgdocs_cc751adab29643b28af1b7bf13b6515b.1723213722186'}"
+       "{'job_name': 'load_documents_content_json_8a4ea730f21c43abbb58d818b9dd4d5a',\n",
+       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_8a4ea730f21c43abbb58d818b9dd4d5a.stream.SupportAI_GraphRAG_pytgdocs_7aed8a01c9c1432b8026ea6c708bf08b.1723490129603',\n",
+       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_8a4ea730f21c43abbb58d818b9dd4d5a.stream.SupportAI_GraphRAG_pytgdocs_7aed8a01c9c1432b8026ea6c708bf08b.1723490129603'}"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "# for v in [\"Document\", \"Content\", \"DocumentChunk\", \"Entity\",\"ResolvedEntity\"]:\n",
-    "# for v in [\"ResolvedEntity\"]:\n",
+    "for v in [\"Document\", \"Content\", \"DocumentChunk\", \"Entity\",\"ResolvedEntity\",\"Community\"]:\n",
     "# for v in [\"ResolvedEntity\"]:\n",
     "    try:\n",
     "        conn.delVertices(v)\n",
@@ -207,6 +213,7 @@
     "\n",
     "import time\n",
     "\n",
+    "print('sleep')\n",
     "time.sleep(3)\n",
     "conn.ai.runDocumentIngest(\n",
     "    res[\"load_job_id\"],\n",
@@ -273,6 +280,33 @@
     "r[\"results\"][0][\"attributes\"][\"description\"]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "def check_vertex_has_desc(conn, comm: str):\n",
+    "    headers = make_headers(conn)\n",
+    "    with httpx.Client(timeout=None) as client:\n",
+    "        resp =  client.get(\n",
+    "            f\"{conn.restppUrl}/graph/{conn.graphname}/vertices/Community/{comm}\",\n",
+    "            headers=headers,\n",
+    "        )\n",
+    "        resp.raise_for_status()\n",
+    "\n",
+    "    print(json.dumps(resp.json(),indent=2))\n",
+    "    desc = resp.json()[\"results\"][0][\"attributes\"][\"description\"]\n",
+    "    print(f\">>>*****{comm}:{desc}********\", flush=True)\n",
+    "\n",
+    "    return len(desc) > 0\n",
+    "check_vertex_has_desc(conn,'Value_Property_1_2')\n",
+    "conn.upsertVertex(\"Community\",\"Rmse_1_2\",{\n",
+    "    \"description\":\"asdf\"\n",
+    "})"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/eventual-consistency-service/app/graphrag/community_summarizer.py b/eventual-consistency-service/app/graphrag/community_summarizer.py
index d250b1f3..2bef4095 100644
--- a/eventual-consistency-service/app/graphrag/community_summarizer.py
+++ b/eventual-consistency-service/app/graphrag/community_summarizer.py
@@ -1,7 +1,5 @@
-import json
+import re
 
-from langchain.output_parsers import PydanticOutputParser
-from langchain.prompts import ChatPromptTemplate
 from langchain_core.prompts import PromptTemplate
 
 from common.llm_services import LLM_Model
@@ -12,17 +10,17 @@
 You are a helpful assistant responsible for generating a comprehensive summary of the data provided below.
 Given one or two entities, and a list of descriptions, all related to the same entity or group of entities.
 Please concatenate all of these into a single, comprehensive description. Make sure to include information collected from all the descriptions.
-If the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary.
+If the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary, but do not add any information that is not in the description.
 Make sure it is written in third person, and include the entity names so we the have full context.
 
 #######
 -Data-
-Entities: {entity_name}
+Commuinty Title: {entity_name}
 Description List: {description_list}
-#######
-Output:
 """)
 
+id_pat = re.compile(r"[_\d]*")
+
 
 class CommunitySummarizer:
     def __init__(
@@ -31,108 +29,16 @@ def __init__(
     ):
         self.llm_service = llm_service
 
-    def _extract_kg_from_doc(self, doc, chain, parser):
-        try:
-            out = chain.invoke(
-                {"input": doc, "format_instructions": parser.get_format_instructions()}
-            )
-        except Exception as e:
-            print("Error: ", e)
-            return {"nodes": [], "rels": []}
-        try:
-            if "```json" not in out.content:
-                json_out = json.loads(out.content.strip("content="))
-            else:
-                json_out = json.loads(
-                    out.content.split("```")[1].strip("```").strip("json").strip()
-                )
-
-            formatted_rels = []
-            for rels in json_out["rels"]:
-                if isinstance(rels["source"], str) and isinstance(rels["target"], str):
-                    formatted_rels.append(
-                        {
-                            "source": rels["source"],
-                            "target": rels["target"],
-                            "type": rels["relation_type"].replace(" ", "_").upper(),
-                            "definition": rels["definition"],
-                        }
-                    )
-                elif isinstance(rels["source"], dict) and isinstance(
-                    rels["target"], str
-                ):
-                    formatted_rels.append(
-                        {
-                            "source": rels["source"]["id"],
-                            "target": rels["target"],
-                            "type": rels["relation_type"].replace(" ", "_").upper(),
-                            "definition": rels["definition"],
-                        }
-                    )
-                elif isinstance(rels["source"], str) and isinstance(
-                    rels["target"], dict
-                ):
-                    formatted_rels.append(
-                        {
-                            "source": rels["source"],
-                            "target": rels["target"]["id"],
-                            "type": rels["relation_type"].replace(" ", "_").upper(),
-                            "definition": rels["definition"],
-                        }
-                    )
-                elif isinstance(rels["source"], dict) and isinstance(
-                    rels["target"], dict
-                ):
-                    formatted_rels.append(
-                        {
-                            "source": rels["source"]["id"],
-                            "target": rels["target"]["id"],
-                            "type": rels["relation_type"].replace(" ", "_").upper(),
-                            "definition": rels["definition"],
-                        }
-                    )
-                else:
-                    raise Exception("Relationship parsing error")
-            formatted_nodes = []
-            for node in json_out["nodes"]:
-                formatted_nodes.append(
-                    {
-                        "id": node["id"],
-                        "type": node["node_type"].replace(" ", "_").capitalize(),
-                        "definition": node["definition"],
-                    }
-                )
-
-            # filter relationships and nodes based on allowed types
-            if self.strict_mode:
-                if self.allowed_vertex_types:
-                    formatted_nodes = [
-                        node
-                        for node in formatted_nodes
-                        if node["type"] in self.allowed_vertex_types
-                    ]
-                if self.allowed_edge_types:
-                    formatted_rels = [
-                        rel
-                        for rel in formatted_rels
-                        if rel["type"] in self.allowed_edge_types
-                    ]
-            return {"nodes": formatted_nodes, "rels": formatted_rels}
-        except:
-            print("Error Processing: ", out)
-        return {"nodes": [], "rels": []}
-
     async def summarize(self, name: str, text: list[str]) -> CommunitySummary:
-        # parser = PydanticOutputParser(pydantic_object=CommunitySummary)
         structured_llm = self.llm_service.model.with_structured_output(CommunitySummary)
         chain = SUMMARIZE_PROMPT | structured_llm
+
+        # remove iteration tags from name
+        name = id_pat.sub("", name)
         summary = await chain.ainvoke(
             {
                 "entity_name": name,
                 "description_list": text,
-                # "format_instructions": parser.get_format_instructions(),
             }
         )
-        # summary = self._extract_kg_from_doc(text, chain, parser)
-        # summary = None
         return summary.summary
diff --git a/eventual-consistency-service/app/graphrag/graph_rag.py b/eventual-consistency-service/app/graphrag/graph_rag.py
index d4e3a7d6..d1e7fdc0 100644
--- a/eventual-consistency-service/app/graphrag/graph_rag.py
+++ b/eventual-consistency-service/app/graphrag/graph_rag.py
@@ -6,7 +6,13 @@
 import httpx
 from aiochannel import Channel
 from graphrag import workers
-from graphrag.util import http_timeout, init, make_headers, stream_ids
+from graphrag.util import (
+    check_vertex_has_desc,
+    http_timeout,
+    init,
+    make_headers,
+    stream_ids,
+)
 from pyTigerGraph import TigerGraphConnection
 
 from common.config import embedding_service
@@ -210,7 +216,6 @@ async def resolve_entities(
     async with asyncio.TaskGroup() as grp:
         # for every entity
         async for entity_id in entity_chan:
-            print(f"***Entity ID from chan {entity_id}", flush=True)
             grp.create_task(
                 workers.resolve_entity(conn, upsert_chan, emb_store, entity_id)
             )
@@ -227,7 +232,7 @@ async def resolve_entities(
         res.raise_for_status()
 
 
-async def communities(conn: TigerGraphConnection, community_chan: Channel):
+async def communities(conn: TigerGraphConnection, comm_process_chan: Channel):
     """
     Run louvain
     """
@@ -250,23 +255,21 @@ async def communities(conn: TigerGraphConnection, community_chan: Channel):
         )
         res.raise_for_status()
     mod = res.json()["results"][0]["mod"]
-    print(f"****mod 1: {mod}", flush=True)
-    await community_chan.put(1)
+    logger.info(f"****mod pass 1: {mod}")
+    await stream_communities(conn, 1, comm_process_chan)
 
     # nth pass: Iterate on Resolved Entities until modularity stops increasing
     prev_mod = -10
     i = 0
-    # for _ in range(1, 5):
-    prev_mod = 0
     while abs(prev_mod - mod) > 0.0000001 and prev_mod != 0:
         prev_mod = mod
-        logger.info(f"Running louvain on Communities (iteration: {i})")
         i += 1
+        logger.info(f"Running louvain on Communities (iteration: {i})")
         # louvain pass
         async with httpx.AsyncClient(timeout=None) as client:
             res = await client.get(
                 f"{conn.restppUrl}/query/{conn.graphname}/graphrag_louvain_communities",
-                params={"n_batches": 1},
+                params={"n_batches": 1, "iteration": i},
                 headers=headers,
             )
 
@@ -281,20 +284,20 @@ async def communities(conn: TigerGraphConnection, community_chan: Channel):
             )
         res.raise_for_status()
         mod = res.json()["results"][0]["mod"]
-        print(f"*** mod {i+1}: {mod}", flush=True)
-        print(f"****** mod diff: {abs(prev_mod - mod)}", flush=True)
+        logger.info(f"*** mod pass {i+1}: {mod} (diff= {abs(prev_mod - mod)})")
 
         # write iter to chan for layer to be processed
-        await community_chan.put(i + 1)
+        await stream_communities(conn, i + 1, comm_process_chan)
 
     # TODO: erase last run since it's ∆q to the run before it will be small
     logger.info("closing communities chan")
-    community_chan.close()
+    comm_process_chan.close()
 
 
 async def stream_communities(
     conn: TigerGraphConnection,
-    community_chan: Channel,
+    # community_chan: Channel,
+    i: int,
     comm_process_chan: Channel,
 ):
     """
@@ -305,37 +308,48 @@ async def stream_communities(
     headers = make_headers(conn)
     # TODO:
     # can only do one layer at a time to ensure that every child community has their descriptions
-    async for i in community_chan:
-        # get the community from that layer
-        async with httpx.AsyncClient(timeout=None) as client:
-            resp = await client.get(
-                f"{conn.restppUrl}/query/{conn.graphname}/stream_community",
-                params={"iter": i},
-                headers=headers,
-            )
-        resp.raise_for_status()
-        comms = resp.json()["results"][0]["Comms"]
 
-        for c in comms:
-            await comm_process_chan.put((i, c["v_id"]))
+    # async for i in community_chan:
+    # get the community from that layer
+    async with httpx.AsyncClient(timeout=None) as client:
+        resp = await client.get(
+            f"{conn.restppUrl}/query/{conn.graphname}/stream_community",
+            params={"iter": i},
+            headers=headers,
+        )
+    resp.raise_for_status()
+    comms = resp.json()["results"][0]["Comms"]
+
+    for c in comms:
+        await comm_process_chan.put((i, c["v_id"]))
+
+    # Wait for all communities for layer i to be processed before doing next layer
+    # all community descriptions must be populated before the next layer can be processed
+    if len(comms) > 0:
+        while not await check_vertex_has_desc(conn, i):
+            logger.info(f"Waiting for layer{i} to finish processing")
+            await asyncio.sleep(5)
+        await asyncio.sleep(3)
 
     logger.info("stream_communities done")
     logger.info("closing comm_process_chan")
-    comm_process_chan.close()
+    # comm_process_chan.close()
 
 
 async def summarize_communities(
     conn: TigerGraphConnection,
     comm_process_chan: Channel,
     upsert_chan: Channel,
+    embed_chan: Channel,
 ):
     async with asyncio.TaskGroup() as tg:
         async for c in comm_process_chan:
-            tg.create_task(workers.process_community(conn, upsert_chan, *c))
-            break
+            tg.create_task(workers.process_community(conn, upsert_chan, embed_chan, *c))
+            # break
 
     logger.info("closing upsert_chan")
     upsert_chan.close()
+    embed_chan.close()
 
 
 async def run(graphname: str, conn: TigerGraphConnection):
@@ -347,14 +361,17 @@ async def run(graphname: str, conn: TigerGraphConnection):
             - embeddings
             - entities/relationships (and their embeddings)
             - upsert everything to the graph
+        - Resolve Entities
+            Ex: "Vincent van Gogh" and "van Gogh" should be resolved to "Vincent van Gogh"
     """
 
     extractor, index_stores = await init(conn)
     init_start = time.perf_counter()
 
-    abc = True
-    abc = False
-    if abc:
+    doc_process_switch = True
+    entity_resolution_switch = True 
+    community_detection_switch = True
+    if doc_process_switch:
         logger.info("Doc Processing Start")
         docs_chan = Channel(1)
         embed_chan = Channel(100)
@@ -381,7 +398,7 @@ async def run(graphname: str, conn: TigerGraphConnection):
     # Entity Resolution
     entity_start = time.perf_counter()
 
-    if abc:
+    if entity_resolution_switch:
         logger.info("Entity Processing Start")
         entities_chan = Channel(100)
         upsert_chan = Channel(100)
@@ -401,7 +418,7 @@ async def run(graphname: str, conn: TigerGraphConnection):
 
     # Community Detection
     community_start = time.perf_counter()
-    if True:
+    if community_detection_switch:
         # FIXME: delete community delete
         for v in ["Community"]:
             try:
@@ -409,20 +426,22 @@ async def run(graphname: str, conn: TigerGraphConnection):
             except:
                 pass
         logger.info("Community Processing Start")
-        communities_chan = Channel(1)
         upsert_chan = Channel(10)
         comm_process_chan = Channel(100)
         upsert_chan = Channel(100)
+        embed_chan = Channel(100)
         async with asyncio.TaskGroup() as grp:
             # run louvain
-            grp.create_task(communities(conn, communities_chan))
+            # grp.create_task(communities(conn, communities_chan))
+            grp.create_task(communities(conn, comm_process_chan))
             # get the communities
+            # grp.create_task( stream_communities(conn, communities_chan, comm_process_chan))
+            # summarize each community
             grp.create_task(
-                stream_communities(conn, communities_chan, comm_process_chan)
+                summarize_communities(conn, comm_process_chan, upsert_chan, embed_chan)
             )
-            # summarize each community
-            grp.create_task(summarize_communities(conn, comm_process_chan, upsert_chan))
             grp.create_task(upsert(upsert_chan))
+            grp.create_task(embed(embed_chan, index_stores, graphname))
 
     community_end = time.perf_counter()
     logger.info("Community Processing End")
diff --git a/eventual-consistency-service/app/graphrag/util.py b/eventual-consistency-service/app/graphrag/util.py
index 6876b5de..bcf1befe 100644
--- a/eventual-consistency-service/app/graphrag/util.py
+++ b/eventual-consistency-service/app/graphrag/util.py
@@ -2,6 +2,7 @@
 import base64
 import json
 import logging
+import re
 import traceback
 from glob import glob
 
@@ -65,6 +66,7 @@ async def init(
         "common/gsql/graphRAG/SetEpochProcessing",
         "common/gsql/graphRAG/ResolveRelationships",
         "common/gsql/graphRAG/get_community_children",
+        "common/gsql/graphRAG/communities_have_desc",
         "common/gsql/graphRAG/louvain/graphrag_louvain_init",
         "common/gsql/graphRAG/louvain/graphrag_louvain_communities",
         "common/gsql/graphRAG/louvain/modularity",
@@ -91,6 +93,7 @@ async def init(
             "Entity",
             "Relationship",
             # "Concept",
+            "Community",
         ],
     )
     index_stores = {}
@@ -108,7 +111,7 @@ async def init(
                 vector_field=milvus_config.get("vector_field", "document_vector"),
                 text_field=milvus_config.get("text_field", "document_content"),
                 vertex_field=vertex_field,
-                drop_old=False,
+                drop_old=True,
             )
 
             LogWriter.info(f"Initializing {name}")
@@ -174,6 +177,10 @@ def map_attrs(attributes: dict):
 
 def process_id(v_id: str):
     v_id = v_id.replace(" ", "_").replace("/", "")
+
+    has_func = re.compile(r"(.*)\(").findall(v_id)
+    if len(has_func) > 0:
+        v_id = has_func[0]
     if v_id == "''" or v_id == '""':
         return ""
 
@@ -186,6 +193,7 @@ async def upsert_vertex(
     vertex_id: str,
     attributes: dict,
 ):
+    logger.info(f"Upsert vertex: {vertex_type} {vertex_id}")
     vertex_id = vertex_id.replace(" ", "_")
     attrs = map_attrs(attributes)
     data = json.dumps({"vertices": {vertex_type: {vertex_id: attrs}}})
@@ -260,10 +268,26 @@ async def get_commuinty_children(conn, i: int, c: str):
     descrs = []
     for d in resp.json()["results"][0]["children"]:
         desc = d["attributes"]["description"]
-        if len(desc) == 0:
+        if i == 1 and all(len(x) == 0 for x in desc):
+            desc = [d["v_id"]]
+        elif len(desc) == 0:
             desc = d["v_id"]
 
         descrs.append(desc)
 
-    print(">>>", descrs, flush=True)
     return descrs
+
+
+async def check_vertex_has_desc(conn, i: int):
+    headers = make_headers(conn)
+    async with httpx.AsyncClient(timeout=None) as client:
+        resp = await client.get(
+            f"{conn.restppUrl}/query/{conn.graphname}/communities_have_desc",
+            params={"iter": i},
+            headers=headers,
+        )
+        resp.raise_for_status()
+
+    res = resp.json()["results"][0]["all_have_desc"]
+
+    return res
diff --git a/eventual-consistency-service/app/graphrag/workers.py b/eventual-consistency-service/app/graphrag/workers.py
index 22980d96..77f3d6d8 100644
--- a/eventual-consistency-service/app/graphrag/workers.py
+++ b/eventual-consistency-service/app/graphrag/workers.py
@@ -299,9 +299,6 @@ async def resolve_entity(
             f"aget_k_closest should, minimally, return the entity itself.\n{results}"
         )
         raise Exception()
-    # FIXME: deleteme
-    # if entity_id == "Dataframe":
-    # print("result:", entity_id, results)
 
     # merge all entities into the ResolvedEntity vertex
     # use the longest v_id as the resolved entity's v_id
@@ -346,8 +343,9 @@ async def resolve_entity(
 async def process_community(
     conn: TigerGraphConnection,
     upsert_chan: Channel,
+    embed_chan: Channel,
     i: int,
-    c: str,
+    comm_id: str,
 ):
     """
     https://github.com/microsoft/graphrag/blob/main/graphrag/prompt_tune/template/community_report_summarization.py
@@ -357,33 +355,39 @@ async def process_community(
 
     embed summaries
     """
-    print(i, c, flush=True)
 
+    logger.info(f"Processing Community: {comm_id}")
     # get the children of the community
-    children = await util.get_commuinty_children(conn, i, c)
+    children = await util.get_commuinty_children(conn, i, comm_id)
     if i == 1:
         tmp = []
         for c in children:
             tmp.extend(c)
         children = list(filter(lambda x: len(x) > 0, tmp))
-    print(">>>", children, flush=True)
-    llm = ecc_util.get_llm_service()
-    summarizer = community_summarizer.CommunitySummarizer(llm)
-    summary = await summarizer.summarize(c, children)
-    await upsert_chan.put((upsert_summary, (conn,summary)))
-
-
-async def upsert_summary(conn: TigerGraphConnection, summary: str):
-    print(f"SUMMARY:> {summary}", flush=True)
-
-    # vertex_id = vertex_id.replace(" ", "_")
-    # attrs = map_attrs(attributes)
-    # data = json.dumps({"vertices": {vertex_type: {vertex_id: attrs}}})
-    # headers = make_headers(conn)
-    # async with httpx.AsyncClient(timeout=http_timeout) as client:
-    #     res = await client.post(
-    #         f"{conn.restppUrl}/graph/{conn.graphname}", data=data, headers=headers
-    #     )
-    #
-    #     res.raise_for_status()
-    #
+    comm_id = util.process_id(comm_id)
+
+    # if the community only has one child, use its description
+    if len(children) == 1:
+        summary = children[0]
+    else:
+        llm = ecc_util.get_llm_service()
+        summarizer = community_summarizer.CommunitySummarizer(llm)
+        summary = await summarizer.summarize(comm_id, children)
+
+    await upsert_chan.put(
+        (
+            util.upsert_vertex,  # func to call
+            (
+                conn,
+                "Community",  # v_type
+                comm_id,  # v_id
+                {  # attrs
+                    "description": summary,
+                    "iteration": i,
+                },
+            ),
+        )
+    )
+
+    # (v_id, content, index_name)
+    await embed_chan.put((comm_id, summary, "Community"))

From 08aca044b071352020cb3bfec3e743e8e178aaa0 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Mon, 12 Aug 2024 16:54:16 -0400
Subject: [PATCH 11/53] cleanup

---
 .../app/graphrag/graph_rag.py                     | 15 +--------------
 .../app/graphrag/workers.py                       |  3 +--
 2 files changed, 2 insertions(+), 16 deletions(-)

diff --git a/eventual-consistency-service/app/graphrag/graph_rag.py b/eventual-consistency-service/app/graphrag/graph_rag.py
index d1e7fdc0..86f172b8 100644
--- a/eventual-consistency-service/app/graphrag/graph_rag.py
+++ b/eventual-consistency-service/app/graphrag/graph_rag.py
@@ -19,8 +19,6 @@
 from common.embeddings.milvus_embedding_store import MilvusEmbeddingStore
 from common.extractors.BaseExtractor import BaseExtractor
 
-# http_logs = logging.getLogger("httpx")
-# http_logs.setLevel(logging.WARNING)
 logger = logging.getLogger(__name__)
 
 consistency_checkers = {}
@@ -192,8 +190,6 @@ async def stream_entities(
         for i in ids["ids"]:
             if len(i) > 0:
                 await entity_chan.put(i)
-                # break
-        # break  # one batch
 
     logger.info("stream_enities done")
     # close the docs chan -- this function is the only sender
@@ -296,7 +292,6 @@ async def communities(conn: TigerGraphConnection, comm_process_chan: Channel):
 
 async def stream_communities(
     conn: TigerGraphConnection,
-    # community_chan: Channel,
     i: int,
     comm_process_chan: Channel,
 ):
@@ -333,7 +328,6 @@ async def stream_communities(
 
     logger.info("stream_communities done")
     logger.info("closing comm_process_chan")
-    # comm_process_chan.close()
 
 
 async def summarize_communities(
@@ -345,7 +339,6 @@ async def summarize_communities(
     async with asyncio.TaskGroup() as tg:
         async for c in comm_process_chan:
             tg.create_task(workers.process_community(conn, upsert_chan, embed_chan, *c))
-            # break
 
     logger.info("closing upsert_chan")
     upsert_chan.close()
@@ -369,7 +362,7 @@ async def run(graphname: str, conn: TigerGraphConnection):
     init_start = time.perf_counter()
 
     doc_process_switch = True
-    entity_resolution_switch = True 
+    entity_resolution_switch = True
     community_detection_switch = True
     if doc_process_switch:
         logger.info("Doc Processing Start")
@@ -419,12 +412,6 @@ async def run(graphname: str, conn: TigerGraphConnection):
     # Community Detection
     community_start = time.perf_counter()
     if community_detection_switch:
-        # FIXME: delete community delete
-        for v in ["Community"]:
-            try:
-                conn.delVertices(v)
-            except:
-                pass
         logger.info("Community Processing Start")
         upsert_chan = Channel(10)
         comm_process_chan = Channel(100)
diff --git a/eventual-consistency-service/app/graphrag/workers.py b/eventual-consistency-service/app/graphrag/workers.py
index 77f3d6d8..755b1085 100644
--- a/eventual-consistency-service/app/graphrag/workers.py
+++ b/eventual-consistency-service/app/graphrag/workers.py
@@ -151,7 +151,7 @@ async def get_vert_desc(conn, v_id, node: Node):
     exists = await util.check_vertex_exists(conn, v_id)
     # if vertex exists, get description content and append this description to it
     if not exists["error"]:
-        # dedup descriptions
+        # deduplicate descriptions
         desc.extend(exists["results"][0]["attributes"]["description"])
         desc = list(set(desc))
     return desc
@@ -316,7 +316,6 @@ async def resolve_entity(
                 "ResolvedEntity",  # v_type
                 resolved_entity_id,  # v_id
                 {  # attrs
-                    # "id": resolved_entity_id,
                 },
             ),
         )

From f2828406ee10fd71b486a32fb0e704d6db895cb3 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Mon, 12 Aug 2024 17:01:26 -0400
Subject: [PATCH 12/53] fmt after merge conflicts

---
 common/embeddings/milvus_embedding_store.py | 8 +++-----
 eventual-consistency-service/app/main.py    | 7 +++++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/common/embeddings/milvus_embedding_store.py b/common/embeddings/milvus_embedding_store.py
index ae352c9e..9302f6f8 100644
--- a/common/embeddings/milvus_embedding_store.py
+++ b/common/embeddings/milvus_embedding_store.py
@@ -5,9 +5,10 @@
 
 import Levenshtein as lev
 from asyncer import asyncify
-from langchain_milvus.vectorstores import Milvus
+from langchain_community.vectorstores import Milvus
 from langchain_core.documents.base import Document
-from pymilvus import connections, utility
+from langchain_milvus.vectorstores import Milvus
+from pymilvus import MilvusException, connections, utility
 from pymilvus.exceptions import MilvusException
 
 from common.embeddings.base_embedding_store import EmbeddingStore
@@ -15,9 +16,6 @@
 from common.logs.log import req_id_cv
 from common.logs.logwriter import LogWriter
 from common.metrics.prometheus_metrics import metrics
-from langchain_community.vectorstores import Milvus
-from langchain_core.documents.base import Document
-from pymilvus import MilvusException, connections, utility
 
 logger = logging.getLogger(__name__)
 
diff --git a/eventual-consistency-service/app/main.py b/eventual-consistency-service/app/main.py
index ce7a2e04..701e363e 100644
--- a/eventual-consistency-service/app/main.py
+++ b/eventual-consistency-service/app/main.py
@@ -96,7 +96,7 @@ def initialize_eventual_consistency_checker(
                     vector_field=milvus_config.get("vector_field", "document_vector"),
                     text_field=milvus_config.get("text_field", "document_content"),
                     vertex_field=vertex_field,
-                    alias=milvus_config.get("alias", "default")
+                    alias=milvus_config.get("alias", "default"),
                 )
 
         chunker = ecc_util.get_chunker()
@@ -190,7 +190,10 @@ def consistency_status(
             background.add_task(graphrag.run, graphname, conn)
             # asyncio.run(graphrag.run(graphname, conn))
             import time
-            ecc_status = f"hi from graph rag ecc: {conn.graphname} ({graphname}) {time.ctime()}"
+
+            ecc_status = (
+                f"hi from graph rag ecc: {conn.graphname} ({graphname}) {time.ctime()}"
+            )
         case _:
             response.status_code = status.HTTP_404_NOT_FOUND
             return f"Method unsupported, must be {SupportAIMethod.SUPPORTAI}, {SupportAIMethod.GRAPHRAG}"

From 50a4fd516cd4195a2693f4693dbbc545e5524326 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Mon, 12 Aug 2024 20:27:30 -0400
Subject: [PATCH 13/53] rm clang dotfiles

---
 common/gsql/graphRAG/.clang-format | 269 -----------------------------
 common/gsql/graphRAG/.clangd       |   2 -
 2 files changed, 271 deletions(-)
 delete mode 100644 common/gsql/graphRAG/.clang-format
 delete mode 100644 common/gsql/graphRAG/.clangd

diff --git a/common/gsql/graphRAG/.clang-format b/common/gsql/graphRAG/.clang-format
deleted file mode 100644
index f0dcec6c..00000000
--- a/common/gsql/graphRAG/.clang-format
+++ /dev/null
@@ -1,269 +0,0 @@
----
-Language:        Cpp
-# BasedOnStyle:  Google
-AccessModifierOffset: -1
-AlignAfterOpenBracket: Align
-AlignArrayOfStructures: None
-AlignConsecutiveAssignments:
-  Enabled:         false
-  AcrossEmptyLines: false
-  AcrossComments:  false
-  AlignCompound:   false
-  PadOperators:    true
-AlignConsecutiveBitFields:
-  Enabled:         false
-  AcrossEmptyLines: false
-  AcrossComments:  false
-  AlignCompound:   false
-  PadOperators:    false
-AlignConsecutiveDeclarations:
-  Enabled:         false
-  AcrossEmptyLines: false
-  AcrossComments:  false
-  AlignCompound:   false
-  PadOperators:    false
-AlignConsecutiveMacros:
-  Enabled:         false
-  AcrossEmptyLines: false
-  AcrossComments:  false
-  AlignCompound:   false
-  PadOperators:    false
-AlignConsecutiveShortCaseStatements:
-  Enabled:         false
-  AcrossEmptyLines: false
-  AcrossComments:  false
-  AlignCaseColons: false
-AlignEscapedNewlines: Left
-AlignOperands:   Align
-AlignTrailingComments:
-  Kind:            Always
-  OverEmptyLines:  0
-AllowAllArgumentsOnNextLine: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: Never
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortEnumsOnASingleLine: true
-AllowShortFunctionsOnASingleLine: All
-AllowShortIfStatementsOnASingleLine: WithoutElse
-AllowShortLambdasOnASingleLine: All
-AllowShortLoopsOnASingleLine: true
-AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakAfterReturnType: None
-AlwaysBreakBeforeMultilineStrings: true
-AlwaysBreakTemplateDeclarations: Yes
-AttributeMacros:
-  - __capability
-BinPackArguments: true
-BinPackParameters: true
-BitFieldColonSpacing: Both
-BraceWrapping:
-  AfterCaseLabel:  false
-  AfterClass:      false
-  AfterControlStatement: Never
-  AfterEnum:       false
-  AfterExternBlock: false
-  AfterFunction:   false
-  AfterNamespace:  false
-  AfterObjCDeclaration: false
-  AfterStruct:     false
-  AfterUnion:      false
-  BeforeCatch:     false
-  BeforeElse:      false
-  BeforeLambdaBody: false
-  BeforeWhile:     false
-  IndentBraces:    false
-  SplitEmptyFunction: true
-  SplitEmptyRecord: true
-  SplitEmptyNamespace: true
-BreakAfterAttributes: Never
-BreakAfterJavaFieldAnnotations: false
-BreakArrays:     true
-BreakBeforeBinaryOperators: None
-BreakBeforeConceptDeclarations: Always
-BreakBeforeBraces: Attach
-BreakBeforeInlineASMColon: OnlyMultiline
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializers: BeforeColon
-BreakInheritanceList: BeforeColon
-BreakStringLiterals: true
-ColumnLimit:     80
-CommentPragmas:  '^ IWYU pragma:'
-CompactNamespaces: false
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
-Cpp11BracedListStyle: true
-DerivePointerAlignment: true
-DisableFormat:   false
-EmptyLineAfterAccessModifier: Never
-EmptyLineBeforeAccessModifier: LogicalBlock
-ExperimentalAutoDetectBinPacking: false
-FixNamespaceComments: true
-ForEachMacros:
-  - foreach
-  - Q_FOREACH
-  - BOOST_FOREACH
-IfMacros:
-  - KJ_IF_MAYBE
-IncludeBlocks:   Regroup
-IncludeCategories:
-  - Regex:           '^<ext/.*\.h>'
-    Priority:        2
-    SortPriority:    0
-    CaseSensitive:   false
-  - Regex:           '^<.*\.h>'
-    Priority:        1
-    SortPriority:    0
-    CaseSensitive:   false
-  - Regex:           '^<.*'
-    Priority:        2
-    SortPriority:    0
-    CaseSensitive:   false
-  - Regex:           '.*'
-    Priority:        3
-    SortPriority:    0
-    CaseSensitive:   false
-IncludeIsMainRegex: '([-_](test|unittest))?$'
-IncludeIsMainSourceRegex: ''
-IndentAccessModifiers: false
-IndentCaseBlocks: false
-IndentCaseLabels: true
-IndentExternBlock: AfterExternBlock
-IndentGotoLabels: true
-IndentPPDirectives: None
-IndentRequiresClause: true
-IndentWidth:     4
-IndentWrappedFunctionNames: false
-InsertBraces:    false
-InsertNewlineAtEOF: false
-InsertTrailingCommas: None
-IntegerLiteralSeparator:
-  Binary:          0
-  BinaryMinDigits: 0
-  Decimal:         0
-  DecimalMinDigits: 0
-  Hex:             0
-  HexMinDigits:    0
-JavaScriptQuotes: Leave
-JavaScriptWrapImports: true
-KeepEmptyLinesAtTheStartOfBlocks: false
-KeepEmptyLinesAtEOF: false
-LambdaBodyIndentation: Signature
-LineEnding:      DeriveLF
-MacroBlockBegin: ''
-MacroBlockEnd:   ''
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-ObjCBinPackProtocolList: Never
-ObjCBlockIndentWidth: 4
-ObjCBreakBeforeNestedBlockParam: true
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: true
-PackConstructorInitializers: NextLine
-PenaltyBreakAssignment: 2
-PenaltyBreakBeforeFirstCallParameter: 1
-PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakOpenParenthesis: 0
-PenaltyBreakString: 1000
-PenaltyBreakTemplateDeclaration: 10
-PenaltyExcessCharacter: 1000000
-PenaltyIndentedWhitespace: 0
-PenaltyReturnTypeOnItsOwnLine: 200
-PointerAlignment: Left
-PPIndentWidth:   -1
-QualifierAlignment: Leave
-RawStringFormats:
-  - Language:        Cpp
-    Delimiters:
-      - cc
-      - CC
-      - cpp
-      - Cpp
-      - CPP
-      - 'c++'
-      - 'C++'
-    CanonicalDelimiter: ''
-    BasedOnStyle:    google
-  - Language:        TextProto
-    Delimiters:
-      - pb
-      - PB
-      - proto
-      - PROTO
-    EnclosingFunctions:
-      - EqualsProto
-      - EquivToProto
-      - PARSE_PARTIAL_TEXT_PROTO
-      - PARSE_TEST_PROTO
-      - PARSE_TEXT_PROTO
-      - ParseTextOrDie
-      - ParseTextProtoOrDie
-      - ParseTestProto
-      - ParsePartialTestProto
-    CanonicalDelimiter: pb
-    BasedOnStyle:    google
-ReferenceAlignment: Pointer
-ReflowComments:  true
-RemoveBracesLLVM: false
-RemoveParentheses: Leave
-RemoveSemicolon: false
-RequiresClausePosition: OwnLine
-RequiresExpressionIndentation: OuterScope
-SeparateDefinitionBlocks: Leave
-ShortNamespaceLines: 1
-SortIncludes:    CaseSensitive
-SortJavaStaticImport: Before
-SortUsingDeclarations: LexicographicNumeric
-SpaceAfterCStyleCast: false
-SpaceAfterLogicalNot: false
-SpaceAfterTemplateKeyword: true
-SpaceAroundPointerQualifiers: Default
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeCaseColon: false
-SpaceBeforeCpp11BracedList: false
-SpaceBeforeCtorInitializerColon: true
-SpaceBeforeInheritanceColon: true
-SpaceBeforeJsonColon: false
-SpaceBeforeParens: ControlStatements
-SpaceBeforeParensOptions:
-  AfterControlStatements: true
-  AfterForeachMacros: true
-  AfterFunctionDefinitionName: false
-  AfterFunctionDeclarationName: false
-  AfterIfMacros:   true
-  AfterOverloadedOperator: false
-  AfterRequiresInClause: false
-  AfterRequiresInExpression: false
-  BeforeNonEmptyParentheses: false
-SpaceBeforeRangeBasedForLoopColon: true
-SpaceBeforeSquareBrackets: false
-SpaceInEmptyBlock: false
-SpacesBeforeTrailingComments: 1
-SpacesInAngles:  Never
-SpacesInContainerLiterals: true
-SpacesInLineCommentPrefix:
-  Minimum:         1
-  Maximum:         -1
-SpacesInParens:  Never
-SpacesInParensOptions:
-  InCStyleCasts:   false
-  InConditionalStatements: false
-  InEmptyParentheses: false
-  Other:           false
-SpacesInSquareBrackets: false
-Standard:        Auto
-StatementAttributeLikeMacros:
-  - Q_EMIT
-StatementMacros:
-  - Q_UNUSED
-  - QT_REQUIRE_VERSION
-TabWidth:        8
-UseTab:          Never
-VerilogBreakBetweenInstancePorts: true
-WhitespaceSensitiveMacros:
-  - BOOST_PP_STRINGIZE
-  - CF_SWIFT_NAME
-  - NS_SWIFT_NAME
-  - PP_STRINGIZE
-  - STRINGIZE
-...
diff --git a/common/gsql/graphRAG/.clangd b/common/gsql/graphRAG/.clangd
deleted file mode 100644
index ec3be0d8..00000000
--- a/common/gsql/graphRAG/.clangd
+++ /dev/null
@@ -1,2 +0,0 @@
-CompileFlags:       
-  Add: [ -std=c++23 ]

From f007c8aac453d7e00009d10d69fb0b49ed174acf Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 10:12:05 -0400
Subject: [PATCH 14/53] final cleanup

---
 common/embeddings/milvus_embedding_store.py   |   5 +-
 common/extractors/GraphExtractor.py           |   3 +-
 .../louvain/graphrag_louvain_communities.gsql |   5 +-
 .../louvain/graphrag_louvain_init.gsql        |  17 +-
 common/gsql/supportai/Scan_For_Updates.gsql   |   8 +-
 common/gsql/supportai/SupportAI_Schema.gsql   |   2 +-
 common/logs/logwriter.py                      |   2 +-
 common/py_schemas/schemas.py                  |   4 -
 copilot/docs/notebooks/graphrag.ipynb         | 398 ------------------
 eventual-consistency-service/app/main.py      |   3 +-
 10 files changed, 17 insertions(+), 430 deletions(-)
 delete mode 100644 copilot/docs/notebooks/graphrag.ipynb

diff --git a/common/embeddings/milvus_embedding_store.py b/common/embeddings/milvus_embedding_store.py
index 9302f6f8..7169379e 100644
--- a/common/embeddings/milvus_embedding_store.py
+++ b/common/embeddings/milvus_embedding_store.py
@@ -596,7 +596,7 @@ def query(self, expr: str, output_fields: List[str]):
 
         return query_result
 
-    def edit_dist_check(self, a: str, b: str, edit_dist_threshold: float, p=False):
+    def edit_dist_check(self, a: str, b: str, edit_dist_threshold: float):
         a = a.lower()
         b = b.lower()
         # if the words are short, they should be the same
@@ -605,8 +605,6 @@ def edit_dist_check(self, a: str, b: str, edit_dist_threshold: float, p=False):
 
         # edit_dist_threshold (as a percent) of word must match
         threshold = int(min(len(a), len(b)) * (1 - edit_dist_threshold))
-        if p:
-            print(a, b, threshold, lev.distance(a, b))
         return lev.distance(a, b) < threshold
 
     async def aget_k_closest(
@@ -641,7 +639,6 @@ async def aget_k_closest(
                     doc.metadata["vertex_id"],
                     v_id,
                     edit_dist_threshold_pct,
-                    # v_id == "Dataframe",
                 )
                 # don't have to merge verts with the same id (they're the same)
                 and doc.metadata["vertex_id"] != v_id
diff --git a/common/extractors/GraphExtractor.py b/common/extractors/GraphExtractor.py
index 282729a4..2a7ba505 100644
--- a/common/extractors/GraphExtractor.py
+++ b/common/extractors/GraphExtractor.py
@@ -40,8 +40,7 @@ def extract(self, text) -> list[GraphDocument]:
         """
         doc = Document(page_content=text)
         graph_docs = self.transformer.convert_to_graph_documents([doc])
-        translated_docs = self.translate(graph_docs)
-        return translated_docs
+        return graph_docs
 
     async def aextract(self, text:str) -> list[GraphDocument]:
         """
diff --git a/common/gsql/graphRAG/louvain/graphrag_louvain_communities.gsql b/common/gsql/graphRAG/louvain/graphrag_louvain_communities.gsql
index 366b7ea7..4137ca68 100644
--- a/common/gsql/graphRAG/louvain/graphrag_louvain_communities.gsql
+++ b/common/gsql/graphRAG/louvain/graphrag_louvain_communities.gsql
@@ -166,14 +166,13 @@ CREATE DISTRIBUTED QUERY graphrag_louvain_communities(UINT iteration=1, UINT max
                   @@community_sum_in_map += (s.@community_id -> e.weight) 
               ELSE
                   // get LINKS_TO edge weights (how many edges are between communities)
-                  // s.@community_k_in_map += (t.@community_id -> 1)
                   @@source_target_k_in_map += (s.@community_vid -> (t.@community_vid -> e.weight))
               END,
               t.@has_parent += TRUE // Used to help find unattached partitions
           POST-ACCUM
               // Write the results to a new community vertex (iteration + 1)
               //                              ID                      , iter, edges within the community
-              INSERT INTO Community VALUES (s.id+"_"+to_string(iteration+1), iteration+1, s.k_in + @@community_sum_in_map.get(s.@community_id), ""),
+              INSERT INTO Community VALUES (s.id+"_"+to_string(iteration+1), iteration+1), ""),
               INSERT INTO HAS_PARENT VALUES (s, s.@community_vid+"_"+to_string(iteration+1)) // link Community's child/parent community
           ;  
 
@@ -183,7 +182,7 @@ CREATE DISTRIBUTED QUERY graphrag_louvain_communities(UINT iteration=1, UINT max
             AND NOT s.@has_parent
           POST-ACCUM
               // if s is a part of an unattached partition, add to its community hierarchy to maintain parity with rest of graph
-              INSERT INTO Community VALUES (s.id+"_"+to_string(iteration+1), iteration+1, s.k_in + @@community_sum_in_map.get(s.@community_id), ""),
+              INSERT INTO Community VALUES (s.id+"_"+to_string(iteration+1), iteration+1, ""),
               INSERT INTO HAS_PARENT VALUES (s, s.id+"_"+to_string(iteration+1)) // link Community's child/parent community
           ; 
 
diff --git a/common/gsql/graphRAG/louvain/graphrag_louvain_init.gsql b/common/gsql/graphRAG/louvain/graphrag_louvain_init.gsql
index 2ccbaf2c..42e9108d 100644
--- a/common/gsql/graphRAG/louvain/graphrag_louvain_init.gsql
+++ b/common/gsql/graphRAG/louvain/graphrag_louvain_init.gsql
@@ -26,11 +26,11 @@ CREATE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UINT n_batches
     DOUBLE wt = 1.0;
 
     // prevent multiple init runs
-    // z = SELECT s FROM AllNodes:s -(_)-> Community:t;
-    // IF z.size() > 0 THEN
-    //     EXCEPTION reinit(400001);
-    //     RAISE reinit("ERROR: the hierarchical communities have already been initialized");
-    // END;
+    z = SELECT s FROM AllNodes:s -(_)-> Community:t;
+    IF z.size() > 0 THEN
+        EXCEPTION reinit(400001);
+        RAISE reinit("ERROR: the hierarchical communities have already been initialized");
+    END;
 
     // init
     z = SELECT s FROM AllNodes:s 
@@ -42,11 +42,6 @@ CREATE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UINT n_batches
     z = SELECT s FROM AllNodes:s -(_)-> ResolvedEntity:t
         ACCUM s.@k += wt,
               @@m += 1;
-        // POST-ACCUM 
-        //     s.@community_id = s, // assign node to its own community
-        //     s.@community_vid = s.id, // external id
-        //     s.@vid = getvid(s), // internal id (used in batching)
-            // s.@batch_id = s.@vid % n_batches; // get batch number
 
     PRINT z.size();
     PRINT z;
@@ -166,7 +161,7 @@ CREATE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UINT n_batches
               END
           POST-ACCUM
               //                              ID                      , iter, edges within the community
-              INSERT INTO Community VALUES (s.@community_vid+"_1", 1, @@community_sum_in_map.get(s.@community_id), ""),
+              INSERT INTO Community VALUES (s.@community_vid+"_1", 1, ""),
               INSERT INTO IN_COMMUNITY VALUES (s, s.@community_vid+"_1") // link entity to it's first community
           ;  
 
diff --git a/common/gsql/supportai/Scan_For_Updates.gsql b/common/gsql/supportai/Scan_For_Updates.gsql
index 7d9d1b83..ba5444bd 100644
--- a/common/gsql/supportai/Scan_For_Updates.gsql
+++ b/common/gsql/supportai/Scan_For_Updates.gsql
@@ -24,10 +24,10 @@ CREATE DISTRIBUTED QUERY Scan_For_Updates(STRING v_type = "Document",
       res = SELECT s FROM start:s -(HAS_CONTENT)-> Content:c
             ACCUM @@v_and_text += (s.id -> c.text)
             POST-ACCUM s.epoch_processing = datetime_to_epoch(now());
-  // ELSE IF v_type == "Concept" THEN
-  //     res = SELECT s FROM start:s
-  //           POST-ACCUM @@v_and_text += (s.id -> s.description),
-  //           s.epoch_processing = datetime_to_epoch(now());
+  ELSE IF v_type == "Concept" THEN
+      res = SELECT s FROM start:s
+            POST-ACCUM @@v_and_text += (s.id -> s.description),
+            s.epoch_processing = datetime_to_epoch(now());
   ELSE IF v_type == "Entity" THEN
       res = SELECT s FROM start:s
             POST-ACCUM @@v_and_text += (s.id -> s.definition),
diff --git a/common/gsql/supportai/SupportAI_Schema.gsql b/common/gsql/supportai/SupportAI_Schema.gsql
index 3e127d82..718ab1a7 100644
--- a/common/gsql/supportai/SupportAI_Schema.gsql
+++ b/common/gsql/supportai/SupportAI_Schema.gsql
@@ -20,7 +20,7 @@ CREATE SCHEMA_CHANGE JOB add_supportai_schema {
     ADD DIRECTED EDGE CONTAINS_DOCUMENT(FROM DocumentCollection, TO Document) WITH REVERSE_EDGE="reverse_CONTAINS_DOCUMENT";
 
     // GraphRAG
-    ADD VERTEX Community (PRIMARY_ID id STRING, iteration UINT, k_in UINT, description STRING) WITH PRIMARY_ID_AS_ATTRIBUTE="true";
+    ADD VERTEX Community (PRIMARY_ID id STRING, iteration UINT, description STRING) WITH PRIMARY_ID_AS_ATTRIBUTE="true";
     ADD VERTEX ResolvedEntity(PRIMARY_ID id STRING, entity_type STRING) WITH PRIMARY_ID_AS_ATTRIBUTE="true";
 
     ADD DIRECTED EDGE RELATIONSHIP(FROM Entity, TO Entity, relation_type STRING) WITH REVERSE_EDGE="reverse_RELATIONSHIP"; 
diff --git a/common/logs/logwriter.py b/common/logs/logwriter.py
index f75be00c..ff13feed 100644
--- a/common/logs/logwriter.py
+++ b/common/logs/logwriter.py
@@ -142,7 +142,7 @@ def log(level, message, mask_pii=True, **kwargs):
             LogWriter.general_logger.info(message)
 
     @staticmethod
-    def info(message, mask_pii=False, **kwargs):
+    def info(message, mask_pii=True, **kwargs):
         LogWriter.log("info", message, mask_pii, **kwargs)
 
     @staticmethod
diff --git a/common/py_schemas/schemas.py b/common/py_schemas/schemas.py
index 07a2113f..a58d4660 100644
--- a/common/py_schemas/schemas.py
+++ b/common/py_schemas/schemas.py
@@ -20,10 +20,6 @@ class SupportAIMethod(enum.StrEnum):
     GRAPHRAG = enum.auto()
 
 
-class EccConfig(BaseModel):
-    method: SupportAIMethod = SupportAIMethod.SUPPORTAI
-
-
 class GSQLQueryInfo(BaseModel):
     function_header: str
     description: str
diff --git a/copilot/docs/notebooks/graphrag.ipynb b/copilot/docs/notebooks/graphrag.ipynb
deleted file mode 100644
index 411f5d62..00000000
--- a/copilot/docs/notebooks/graphrag.ipynb
+++ /dev/null
@@ -1,398 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "from pyTigerGraph import TigerGraphConnection\n",
-    "from dotenv import load_dotenv\n",
-    "\n",
-    "load_dotenv()\n",
-    "# We first create a connection to the database\n",
-    "host = os.environ[\"HOST\"]\n",
-    "username = os.getenv(\"USERNAME\", \"tigergraph\")\n",
-    "password = os.getenv(\"PASS\", \"tigergraph\")\n",
-    "conn = TigerGraphConnection(\n",
-    "    host=host,\n",
-    "    username=username,\n",
-    "    password=password,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'The graph GraphRAG_pytgdocs is created.'"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "conn.graphname = \"GraphRAG_pytgdocs\"\n",
-    "conn.gsql(\"\"\"CREATE GRAPH GraphRAG_pytgdocs()\"\"\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "_ = conn.getToken()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'host_name': 'https://algotesting.i.tgcloud.io',\n",
-       " 'schema_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_schema].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_schema\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 0)\\\\nTrying to add local vertex \\'DocumentChunk\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Document\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Concept\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Entity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Relationship\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'DocumentCollection\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Content\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'Community\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local vertex \\'ResolvedEntity\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CONTENT\\' and its reverse edge \\'reverse_HAS_CONTENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_CHILD_OF\\' and its reverse edge \\'reverse_IS_CHILD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_HEAD_OF\\' and its reverse edge \\'reverse_IS_HEAD_OF\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_TAIL\\' and its reverse edge \\'reverse_HAS_TAIL\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_RELATIONSHIP\\' and its reverse edge \\'reverse_DESCRIBES_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'DESCRIBES_ENTITY\\' and its reverse edge \\'reverse_DESCRIBES_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_ENTITY\\' and its reverse edge \\'reverse_CONTAINS_ENTITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'MENTIONS_RELATIONSHIP\\' and its reverse edge \\'reverse_MENTIONS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IS_AFTER\\' and its reverse edge \\'reverse_IS_AFTER\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_CHILD\\' and its reverse edge \\'reverse_HAS_CHILD\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_RELATIONSHIP\\' and its reverse edge \\'reverse_HAS_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'CONTAINS_DOCUMENT\\' and its reverse edge \\'reverse_CONTAINS_DOCUMENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RELATIONSHIP\\' and its reverse edge \\'reverse_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVES_TO\\' and its reverse edge \\'reverse_RESOLVES_TO\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'RESOLVED_RELATIONSHIP\\' and its reverse edge \\'reverse_RESOLVED_RELATIONSHIP\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'IN_COMMUNITY\\' and its reverse edge \\'reverse_IN_COMMUNITY\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'LINKS_TO\\' and its reverse edge \\'reverse_LINKS_TO\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add local edge \\'HAS_PARENT\\' and its reverse edge \\'reverse_HAS_PARENT\\' to the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 1\\\\nThe job add_supportai_schema completes in 1.845 seconds!\\\\nLocal schema change succeeded.\"',\n",
-       " 'index_creation_status': '\"Using graph \\'GraphRAG_pytgdocs\\'\\\\nSuccessfully created schema change jobs: [add_supportai_indexes].\\\\nWARNING: When modifying the graph schema, reinstalling all affected queries is required, and the duration of this process may vary based on the number and complexity of the queries. To skip query reinstallation, you can run with the \\'-N\\' option, but manual reinstallation of queries will be necessary afterwards.\\\\nKick off schema change job add_supportai_indexes\\\\nDoing schema change on graph \\'GraphRAG_pytgdocs\\' (current version: 1)\\\\nTrying to add index \\'doc_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_epoch_processing_indexepoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Document\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'doc_chunk_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'DocumentChunk\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_added_index\\' on the attribute \\'epoch_added\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processing_index\\' on the attribute \\'epoch_processing\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\nTrying to add index \\'concept_epoch_processed_index\\' on the attribute \\'epoch_processed\\' of local vertex \\'Concept\\' on the graph \\'GraphRAG_pytgdocs\\'.\\\\n\\\\nGraph GraphRAG_pytgdocs updated to new version 2\\\\nThe job add_supportai_indexes completes in 1.085 seconds!\\\\nLocal schema change succeeded.\"'}"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# # And then add CoPilot's address to the connection. This address\n",
-    "# # is the host's address where the CoPilot container is running.\n",
-    "conn.ai.configureCoPilotHost(\"http://localhost:8000\")\n",
-    "conn.ai.initializeSupportAI()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "access = os.environ[\"AWS_ACCESS_KEY_ID\"]\n",
-    "sec = os.environ[\"AWS_SECRET_ACCESS_KEY\"]\n",
-    "res = conn.ai.createDocumentIngest(\n",
-    "    data_source=\"s3\",\n",
-    "    data_source_config={\"aws_access_key\": access, \"aws_secret_key\": sec},\n",
-    "    loader_config={\"doc_id_field\": \"url\", \"content_field\": \"content\"},\n",
-    "    file_format=\"json\",\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'job_name': 'load_documents_content_json_cde7e4db979b4ba8a0b6ec5eb927f875',\n",
-       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_cde7e4db979b4ba8a0b6ec5eb927f875.stream.SupportAI_GraphRAG_pytgdocs_48ee36da7b7644e4995722a6e057d446.1723494758507',\n",
-       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_cde7e4db979b4ba8a0b6ec5eb927f875.stream.SupportAI_GraphRAG_pytgdocs_48ee36da7b7644e4995722a6e057d446.1723494758507'}"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "conn.ai.runDocumentIngest(\n",
-    "    res[\"load_job_id\"],\n",
-    "    res[\"data_source_id\"],\n",
-    "    \"s3://tg-documentation/pytg_current/pytg_current.jsonl\",\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# import httpx\n",
-    "# import base64\n",
-    "\n",
-    "\n",
-    "# def make_headers(conn: TigerGraphConnection):\n",
-    "#     tkn = base64.b64encode(f\"{conn.username}:{conn.password}\".encode()).decode()\n",
-    "#     headers = {\"Authorization\": f\"Basic {tkn}\"}\n",
-    "#     return headers\n",
-    "\n",
-    "\n",
-    "# httpx.get(\n",
-    "#     \"http://localhost:8001/GraphRAG_pytgdocs/consistency_status/graphrag\",\n",
-    "#     headers=make_headers(conn),\n",
-    "#     timeout=None,\n",
-    "# )\n",
-    "# # conn.ai.forceConsistencyUpdate()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "NameError",
-     "evalue": "name 'asdf' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[32], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43masdf\u001b[49m\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'asdf' is not defined"
-     ]
-    }
-   ],
-   "source": [
-    "asdf"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for v in [\"Community\"]:\n",
-    "    try:\n",
-    "        conn.delVertices(v)\n",
-    "    except:\n",
-    "        pass\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "sleep\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'job_name': 'load_documents_content_json_8a4ea730f21c43abbb58d818b9dd4d5a',\n",
-       " 'job_id': 'GraphRAG_pytgdocs.load_documents_content_json_8a4ea730f21c43abbb58d818b9dd4d5a.stream.SupportAI_GraphRAG_pytgdocs_7aed8a01c9c1432b8026ea6c708bf08b.1723490129603',\n",
-       " 'log_location': '/home/tigergraph/tigergraph/log/kafkaLoader/GraphRAG_pytgdocs.load_documents_content_json_8a4ea730f21c43abbb58d818b9dd4d5a.stream.SupportAI_GraphRAG_pytgdocs_7aed8a01c9c1432b8026ea6c708bf08b.1723490129603'}"
-      ]
-     },
-     "execution_count": 24,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "for v in [\"Document\", \"Content\", \"DocumentChunk\", \"Entity\",\"ResolvedEntity\",\"Community\"]:\n",
-    "# for v in [\"ResolvedEntity\"]:\n",
-    "    try:\n",
-    "        conn.delVertices(v)\n",
-    "    except:\n",
-    "        pass\n",
-    "\n",
-    "import time\n",
-    "\n",
-    "print('sleep')\n",
-    "time.sleep(3)\n",
-    "conn.ai.runDocumentIngest(\n",
-    "    res[\"load_job_id\"],\n",
-    "    res[\"data_source_id\"],\n",
-    "    \"s3://tg-documentation/pytg_current/pytg_current.jsonl\",\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "conn.gsql(f\"\"\"\n",
-    "USE GRAPH {conn.graphname}\n",
-    "DROP QUERY ResolveRelationships\n",
-    "\"\"\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import base64\n",
-    "import json\n",
-    "import httpx\n",
-    "import logging\n",
-    "\n",
-    "_ = logging.getLogger(__name__)\n",
-    "\n",
-    "\n",
-    "http_timeout = None\n",
-    "\n",
-    "\n",
-    "def make_headers(conn: TigerGraphConnection):\n",
-    "    if conn.apiToken is None or conn.apiToken == \"\":\n",
-    "        tkn = base64.b64encode(f\"{conn.username}:{conn.password}\".encode()).decode()\n",
-    "        headers = {\"Authorization\": f\"Basic {tkn}\"}\n",
-    "    else:\n",
-    "        headers = {\"Authorization\": f\"Bearer {conn.apiToken}\"}\n",
-    "\n",
-    "    return headers\n",
-    "\n",
-    "\n",
-    "def check_vertex_exists(conn, id):\n",
-    "    headers = make_headers(conn)\n",
-    "    with httpx.Client(timeout=http_timeout) as client:\n",
-    "        res = client.get(\n",
-    "            f\"{conn.restppUrl}/graph/{conn.graphname}/vertices/Entity/{id}\",\n",
-    "            headers=headers,\n",
-    "        )\n",
-    "\n",
-    "        res.raise_for_status()\n",
-    "    return res.json()\n",
-    "\n",
-    "\n",
-    "# r = check_vertex_exists(conn, \"asdfTigergraphexception\")\n",
-    "# print(json.dumps(r, indent=2), r[\"error\"])\n",
-    "r = check_vertex_exists(conn, \"Tigergraphexception\")\n",
-    "print(json.dumps(r, indent=2), r[\"error\"])\n",
-    "r[\"results\"][0][\"attributes\"][\"description\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "def check_vertex_has_desc(conn, comm: str):\n",
-    "    headers = make_headers(conn)\n",
-    "    with httpx.Client(timeout=None) as client:\n",
-    "        resp =  client.get(\n",
-    "            f\"{conn.restppUrl}/graph/{conn.graphname}/vertices/Community/{comm}\",\n",
-    "            headers=headers,\n",
-    "        )\n",
-    "        resp.raise_for_status()\n",
-    "\n",
-    "    print(json.dumps(resp.json(),indent=2))\n",
-    "    desc = resp.json()[\"results\"][0][\"attributes\"][\"description\"]\n",
-    "    print(f\">>>*****{comm}:{desc}********\", flush=True)\n",
-    "\n",
-    "    return len(desc) > 0\n",
-    "check_vertex_has_desc(conn,'Value_Property_1_2')\n",
-    "conn.upsertVertex(\"Community\",\"Rmse_1_2\",{\n",
-    "    \"description\":\"asdf\"\n",
-    "})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def map_attrs(attributes: dict):\n",
-    "    # map attrs\n",
-    "    attrs = {}\n",
-    "    for k, v in attributes.items():\n",
-    "        if isinstance(v, tuple):\n",
-    "            attrs[k] = {\"value\": v[0], \"op\": v[1]}\n",
-    "        elif isinstance(v, dict):\n",
-    "            attrs[k] = {\n",
-    "                \"value\": {\"keylist\": list(v.keys()), \"valuelist\": list(v.values())}\n",
-    "            }\n",
-    "        else:\n",
-    "            attrs[k] = {\"value\": v}\n",
-    "    return attrs\n",
-    "\n",
-    "\n",
-    "def process_id(v_id: str):\n",
-    "    return v_id.replace(\" \", \"_\").replace(\"/\", \"\")\n",
-    "\n",
-    "\n",
-    "def a(vertex_id=\"Post /Requesttoken\"):\n",
-    "    vertex_id = process_id(vertex_id)\n",
-    "    attributes = {  # attrs\n",
-    "        \"description\": [\"test\"],\n",
-    "        \"epoch_added\": int(time.time()),\n",
-    "    }\n",
-    "\n",
-    "    vertex_id = vertex_id.replace(\" \", \"_\")\n",
-    "    attrs = map_attrs(attributes)\n",
-    "    data = json.dumps({\"vertices\": {\"Entity\": {vertex_id: attrs}}})\n",
-    "    headers = make_headers(conn)\n",
-    "    with httpx.Client(timeout=http_timeout) as client:\n",
-    "        res = client.post(\n",
-    "            f\"{conn.restppUrl}/graph/{conn.graphname}\", data=data, headers=headers\n",
-    "        )\n",
-    "\n",
-    "        res.raise_for_status()\n",
-    "\n",
-    "    return res.json()\n",
-    "\n",
-    "\n",
-    "a()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from urllib import parse\n",
-    "\n",
-    "v_id = \"Post_/Requesttoken\"\n",
-    "v_id = process_id(v_id)\n",
-    "print(v_id)\n",
-    "\n",
-    "r = check_vertex_exists(conn, v_id)\n",
-    "print(json.dumps(r, indent=2), r[\"error\"])\n",
-    "r[\"results\"][0][\"attributes\"][\"description\"]"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "ml",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/eventual-consistency-service/app/main.py b/eventual-consistency-service/app/main.py
index 701e363e..34403f1e 100644
--- a/eventual-consistency-service/app/main.py
+++ b/eventual-consistency-service/app/main.py
@@ -188,11 +188,10 @@ def consistency_status(
             LogWriter.info(f"Returning consistency status for {graphname}: {status}")
         case SupportAIMethod.GRAPHRAG:
             background.add_task(graphrag.run, graphname, conn)
-            # asyncio.run(graphrag.run(graphname, conn))
             import time
 
             ecc_status = (
-                f"hi from graph rag ecc: {conn.graphname} ({graphname}) {time.ctime()}"
+                f"GraphRAG initialization: {conn.graphname} ({graphname}) {time.ctime()}"
             )
         case _:
             response.status_code = status.HTTP_404_NOT_FOUND

From 2d1e98b16a759100686e115f35c3c479ad537ddb Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 12:34:16 -0400
Subject: [PATCH 15/53] reqs to fix unit tests

---
 copilot/requirements.txt | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index 7a8bd83f..03157f17 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -68,15 +68,15 @@ joblib==1.3.2
 jq==1.6.0
 jsonpatch==1.33
 jsonpointer==2.4
-langchain==0.2.11
-langchain-community==0.2.10
-langchain-core==0.2.25
-langchain-experimental==0.0.63
+langchain==0.2.12
+langchain-community==0.2.11
+langchain-core==0.2.29
+langchain-experimental==0.0.64
 langchain-groq==0.1.8
 langchain-ibm==0.1.11
 langchain-text-splitters==0.2.2
 langchain_milvus==0.1.3
-langchain_openai==0.1.19
+langchain-openai==0.1.20
 langchainhub==0.1.20
 langdetect==1.0.9
 langgraph==0.1.16
@@ -152,4 +152,4 @@ wandb==0.15.12
 watchfiles==0.20.0
 websockets==11.0.3
 yarl==1.9.2
-zipp==3.19.2
\ No newline at end of file
+zipp==3.19.2

From e0065ee60b85b42b483e28ce0603c4ef2451c05b Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 12:43:42 -0400
Subject: [PATCH 16/53] reqs to fix unit test

---
 copilot/requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index 03157f17..3035d7c1 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -70,7 +70,6 @@ jsonpatch==1.33
 jsonpointer==2.4
 langchain==0.2.12
 langchain-community==0.2.11
-langchain-core==0.2.29
 langchain-experimental==0.0.64
 langchain-groq==0.1.8
 langchain-ibm==0.1.11

From 2a5434abd2dcffac69e689d097e232f25be1ca09 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 12:47:01 -0400
Subject: [PATCH 17/53] reqs to fix unit test

---
 copilot/requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index 3035d7c1..302c9b44 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -79,7 +79,6 @@ langchain-openai==0.1.20
 langchainhub==0.1.20
 langdetect==1.0.9
 langgraph==0.1.16
-langsmith==0.1.94
 lomond==0.3.3
 lxml==4.9.3
 marshmallow==3.20.1

From a43490a852729c076a7aa0a11d685298ce66c3da Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 12:51:28 -0400
Subject: [PATCH 18/53] reqs to fix unit test

---
 copilot/requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index 302c9b44..7df43165 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -68,17 +68,17 @@ joblib==1.3.2
 jq==1.6.0
 jsonpatch==1.33
 jsonpointer==2.4
-langchain==0.2.12
 langchain-community==0.2.11
 langchain-experimental==0.0.64
 langchain-groq==0.1.8
 langchain-ibm==0.1.11
 langchain-text-splitters==0.2.2
 langchain_milvus==0.1.3
-langchain-openai==0.1.20
 langchainhub==0.1.20
 langdetect==1.0.9
 langgraph==0.1.16
+langchain==0.2.12
+langchain-openai==0.1.20
 lomond==0.3.3
 lxml==4.9.3
 marshmallow==3.20.1

From 4b76e73d5a284b90bb923707e52e72dae3c1d040 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 12:57:39 -0400
Subject: [PATCH 19/53] reqs to fix unit test

---
 copilot/requirements.txt | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index 7df43165..ba1f04e3 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -68,17 +68,19 @@ joblib==1.3.2
 jq==1.6.0
 jsonpatch==1.33
 jsonpointer==2.4
+langchain==0.2.12
 langchain-community==0.2.11
+langchain-core==0.2.3
 langchain-experimental==0.0.64
 langchain-groq==0.1.8
 langchain-ibm==0.1.11
 langchain-text-splitters==0.2.2
 langchain_milvus==0.1.3
+langchain-openai==0.1.20
 langchainhub==0.1.20
 langdetect==1.0.9
 langgraph==0.1.16
-langchain==0.2.12
-langchain-openai==0.1.20
+langsmith==0.1.94
 lomond==0.3.3
 lxml==4.9.3
 marshmallow==3.20.1

From 115b1b3f9f5c046b1f8d03761dccdf43a9b32320 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 13:02:42 -0400
Subject: [PATCH 20/53] reqs to fix unit test

---
 copilot/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index ba1f04e3..5e475767 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -68,7 +68,7 @@ joblib==1.3.2
 jq==1.6.0
 jsonpatch==1.33
 jsonpointer==2.4
-langchain==0.2.12
+langchain==0.2.13
 langchain-community==0.2.11
 langchain-core==0.2.3
 langchain-experimental==0.0.64

From 58b5cbe6694f24f46f5e669e85b2e3abde0a1598 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 13:04:33 -0400
Subject: [PATCH 21/53] reqs to fix unit test

---
 copilot/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index 5e475767..7b30e5b5 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -70,7 +70,7 @@ jsonpatch==1.33
 jsonpointer==2.4
 langchain==0.2.13
 langchain-community==0.2.11
-langchain-core==0.2.3
+langchain-core==0.2.30
 langchain-experimental==0.0.64
 langchain-groq==0.1.8
 langchain-ibm==0.1.11

From fa960394b2acb3f88ef9171218445c5c57915b84 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 13:11:50 -0400
Subject: [PATCH 22/53] reqs to fix unit test

---
 copilot/requirements.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index 7b30e5b5..632a6eba 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -68,15 +68,15 @@ joblib==1.3.2
 jq==1.6.0
 jsonpatch==1.33
 jsonpointer==2.4
-langchain==0.2.13
-langchain-community==0.2.11
-langchain-core==0.2.30
+langchain==0.2.11
+langchain-community==0.2.10
+langchain-core==0.2.25
 langchain-experimental==0.0.64
 langchain-groq==0.1.8
 langchain-ibm==0.1.11
 langchain-text-splitters==0.2.2
 langchain_milvus==0.1.3
-langchain-openai==0.1.20
+langchain_openai==0.1.19
 langchainhub==0.1.20
 langdetect==1.0.9
 langgraph==0.1.16

From 905d5cfa324d373af3dd7f9266c6d795ec122b1c Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 13:30:37 -0400
Subject: [PATCH 23/53] reqs to fix unit test

---
 copilot/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index 632a6eba..e69f2be6 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -70,7 +70,7 @@ jsonpatch==1.33
 jsonpointer==2.4
 langchain==0.2.11
 langchain-community==0.2.10
-langchain-core==0.2.25
+langchain-core==0.2.29
 langchain-experimental==0.0.64
 langchain-groq==0.1.8
 langchain-ibm==0.1.11

From 5e8b0aeaf569ffa9570ac871fd804dce21b89414 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 14:09:27 -0400
Subject: [PATCH 24/53] reqs to fix unit test

---
 copilot/requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index e69f2be6..e6fb3718 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -81,6 +81,7 @@ langchainhub==0.1.20
 langdetect==1.0.9
 langgraph==0.1.16
 langsmith==0.1.94
+Levenshtein==0.25.1
 lomond==0.3.3
 lxml==4.9.3
 marshmallow==3.20.1
@@ -118,7 +119,7 @@ pyTigerDriver==1.0.15
 pyTigerGraph==1.6.2
 pytz==2023.3.post1
 PyYAML==6.0.1
-rapidfuzz==3.4.0
+rapidfuzz==3.8.0
 regex==2023.10.3
 requests==2.32.2
 rsa==4.9

From be0177e9b5d9dd50231d07fb43c8c5b6dd69b377 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 14:29:45 -0400
Subject: [PATCH 25/53] reqs to fix unit test

---
 copilot/requirements.txt | 237 +++++++++++++++++++++------------------
 1 file changed, 129 insertions(+), 108 deletions(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index e6fb3718..af45c357 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -1,156 +1,177 @@
-aiohttp==3.9.3
+aiohappyeyeballs==2.3.5
+aiohttp==3.10.3
 aiosignal==1.3.1
-annotated-types==0.5.0
-anyio==3.7.1
+annotated-types==0.7.0
+anyio==4.4.0
 appdirs==1.4.4
 argon2-cffi==23.1.0
 argon2-cffi-bindings==21.2.0
 async-timeout==4.0.3
 asyncer==0.0.7
-attrs==23.1.0
-azure-core==1.30.1
-azure-storage-blob==12.19.1
+attrs==24.2.0
+azure-core==1.30.2
+azure-storage-blob==12.22.0
 backoff==2.2.1
-beautifulsoup4==4.12.2
-boto3==1.28.83
-botocore==1.31.83
-cachetools==5.3.2
-certifi==2023.7.22
-cffi==1.16.0
+beautifulsoup4==4.12.3
+boto3==1.34.159
+botocore==1.34.159
+cachetools==5.4.0
+certifi==2024.7.4
+cffi==1.17.0
 chardet==5.2.0
-charset-normalizer==3.2.0
+charset-normalizer==3.3.2
 click==8.1.7
-cryptography==42.0.5
-dataclasses-json==0.5.14
-distro==1.8.0
+contourpy==1.2.1
+cryptography==43.0.0
+cycler==0.12.1
+dataclasses-json==0.6.7
+deepdiff==7.0.1
+distro==1.9.0
 docker-pycreds==0.4.0
 docstring_parser==0.16
-emoji==2.8.0
+emoji==2.12.1
 environs==9.5.0
-exceptiongroup==1.1.3
-fastapi==0.103.1
+exceptiongroup==1.2.2
+fastapi==0.112.0
 filelock==3.15.4
 filetype==1.2.0
-frozenlist==1.4.0
-fsspec==2024.6.0
+fonttools==4.53.1
+frozenlist==1.4.1
+fsspec==2024.6.1
 gitdb==4.0.11
-GitPython==3.1.40
-google-api-core==2.14.0
-google-auth==2.23.4
-google-cloud-aiplatform==1.52.0
-google-cloud-bigquery==3.13.0
-google-cloud-core==2.3.3
-google-cloud-resource-manager==1.10.4
-google-cloud-storage==2.13.0
+GitPython==3.1.43
+google-api-core==2.19.1
+google-auth==2.33.0
+google-cloud-aiplatform==1.61.0
+google-cloud-bigquery==3.25.0
+google-cloud-core==2.4.1
+google-cloud-resource-manager==1.12.5
+google-cloud-storage==2.18.2
 google-crc32c==1.5.0
-google-resumable-media==2.6.0
-googleapis-common-protos==1.61.0
-greenlet==2.0.2
-groq==0.5.0
-grpc-google-iam-v1==0.12.7
-grpcio==1.59.2
-grpcio-status==1.59.2
+google-resumable-media==2.7.2
+googleapis-common-protos==1.63.2
+greenlet==3.0.3
+groq==0.9.0
+grpc-google-iam-v1==0.13.1
+grpcio==1.63.0
+grpcio-status==1.63.0
 h11==0.14.0
-httpcore==0.18.0
-httptools==0.6.0
-httpx==0.25.0
-huggingface-hub==0.23.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.24.5
 ibm-cos-sdk==2.13.6
 ibm-cos-sdk-core==2.13.6
 ibm-cos-sdk-s3transfer==2.13.6
-ibm_watsonx_ai==1.0.11
-idna==3.4
-importlib_metadata==8.0.0
+ibm_watsonx_ai==1.1.5
+idna==3.7
+importlib_metadata==8.2.0
 iniconfig==2.0.0
 isodate==0.6.1
+jiter==0.5.0
 jmespath==1.0.1
-joblib==1.3.2
-jq==1.6.0
+joblib==1.4.2
+jq==1.7.0
 jsonpatch==1.33
-jsonpointer==2.4
-langchain==0.2.11
-langchain-community==0.2.10
-langchain-core==0.2.29
+jsonpath-python==1.0.6
+jsonpointer==3.0.0
+kiwisolver==1.4.5
+langchain==0.2.13
+langchain-community==0.2.12
+langchain-core==0.2.30
 langchain-experimental==0.0.64
-langchain-groq==0.1.8
-langchain-ibm==0.1.11
+langchain-groq==0.1.9
+langchain-ibm==0.1.12
+langchain-milvus==0.1.4
+langchain-openai==0.1.21
 langchain-text-splitters==0.2.2
-langchain_milvus==0.1.3
-langchain_openai==0.1.19
-langchainhub==0.1.20
+langchainhub==0.1.21
 langdetect==1.0.9
-langgraph==0.1.16
-langsmith==0.1.94
+langgraph==0.2.3
+langgraph-checkpoint==1.0.2
+langsmith==0.1.99
 Levenshtein==0.25.1
 lomond==0.3.3
-lxml==4.9.3
-marshmallow==3.20.1
-matplotlib==3.9.1
-minio==7.2.5
-multidict==6.0.4
+lxml==5.3.0
+marshmallow==3.21.3
+matplotlib==3.9.2
+milvus-lite==2.4.9
+minio==7.2.7
+multidict==6.0.5
 mypy-extensions==1.0.0
-nltk==3.8.1
+nest-asyncio==1.6.0
+nltk==3.8.2
 numpy==1.26.4
-openai==1.37.1
-orjson==3.9.15
-packaging==23.2
-pandas==2.1.1
+openai==1.40.6
+ordered-set==4.1.0
+orjson==3.10.7
+packaging==24.1
+pandas==2.1.4
 pathtools==0.1.2
+pillow==10.4.0
+platformdirs==4.2.2
 pluggy==1.5.0
 prometheus_client==0.20.0
-proto-plus==1.22.3
-protobuf==4.24.4
-psutil==5.9.6
-pyarrow==15.0.1
-pyasn1==0.5.0
-pyasn1-modules==0.3.0
-pycparser==2.21
+proto-plus==1.24.0
+protobuf==5.27.3
+psutil==6.0.0
+pyarrow==17.0.0
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pycparser==2.22
 pycryptodome==3.20.0
-pydantic==2.3.0
-pydantic_core==2.6.3
-pygit2==1.13.2
-pymilvus==2.4.4
-pytest==8.2.0
+pydantic==2.8.2
+pydantic_core==2.20.1
+pygit2==1.15.1
+pymilvus==2.4.5
+pyparsing==3.1.2
+pypdf==4.3.1
+pytest==8.3.2
 python-dateutil==2.9.0.post0
-python-dotenv==1.0.0
-python-iso639==2023.6.15
+python-dotenv==1.0.1
+python-iso639==2024.4.27
 python-magic==0.4.27
 pyTigerDriver==1.0.15
-pyTigerGraph==1.6.2
-pytz==2023.3.post1
-PyYAML==6.0.1
-rapidfuzz==3.8.0
-regex==2023.10.3
+pyTigerGraph==1.6.5
+pytz==2024.1
+PyYAML==6.0.2
+rapidfuzz==3.9.6
+regex==2024.7.24
 requests==2.32.2
+requests-toolbelt==1.0.0
 rsa==4.9
-s3transfer==0.7.0
+s3transfer==0.10.2
 scikit-learn==1.5.1
-sentry-sdk==1.32.0
+scipy==1.14.0
+sentry-sdk==2.13.0
 setproctitle==1.3.3
-shapely==2.0.2
+shapely==2.0.5
 six==1.16.0
 smmap==5.0.1
-sniffio==1.3.0
-soupsieve==2.5
-SQLAlchemy==2.0.20
-starlette==0.27.0
+sniffio==1.3.1
+soupsieve==2.6
+SQLAlchemy==2.0.32
+starlette==0.37.2
 tabulate==0.9.0
-tenacity==8.2.3
+tenacity==8.5.0
+threadpoolctl==3.5.0
 tiktoken==0.7.0
-tqdm==4.66.1
-types-requests==2.31.0.6
+tqdm==4.66.5
+types-requests==2.32.0.20240712
 types-urllib3==1.26.25.14
 typing-inspect==0.9.0
-typing_extensions==4.8.0
-tzdata==2023.3
-ujson==5.9.0
-unstructured==0.10.23
-urllib3==1.26.18
-uvicorn==0.23.2
-uvloop==0.17.0
-validators==0.22.0
-wandb==0.15.12
-watchfiles==0.20.0
-websockets==11.0.3
-yarl==1.9.2
-zipp==3.19.2
+typing_extensions==4.12.2
+tzdata==2024.1
+ujson==5.10.0
+unstructured==0.15.1
+unstructured-client==0.25.5
+urllib3==2.2.2
+uvicorn==0.30.6
+uvloop==0.19.0
+validators==0.33.0
+wandb==0.17.6
+watchfiles==0.23.0
+websockets==12.0
+wrapt==1.16.0
+yarl==1.9.4
+zipp==3.20.0

From cb43815468caf756311d087c03b25dc2395184fb Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 14:35:57 -0400
Subject: [PATCH 26/53] reqs to fix unit test

---
 common/requirements.txt | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/common/requirements.txt b/common/requirements.txt
index bb20e5b9..a8cc3d51 100644
--- a/common/requirements.txt
+++ b/common/requirements.txt
@@ -70,15 +70,14 @@ joblib==1.3.2
 jq==1.6.0
 jsonpatch==1.33
 jsonpointer==2.4
-langchain==0.2.12
+langchain==0.2.13
 langchain-community==0.2.11
-langchain-core==0.2.29
+langchain-core==0.2.3
 langchain-experimental==0.0.64
 langchain-openai==0.1.20
 langchain-text-splitters==0.2.2
 langsmith==0.1.98
 Levenshtein==0.25.1
-langchain==0.2.11
 langchain-groq==0.1.8
 langchain-ibm==0.1.11
 langchain_milvus==0.1.3

From ac6d3fe8d910eee102af6bab204437fc45626486 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 14:52:41 -0400
Subject: [PATCH 27/53] reqs to fix unit test

---
 .github/workflows/pull-test-merge.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pull-test-merge.yaml b/.github/workflows/pull-test-merge.yaml
index 654703d8..3a61ecaf 100644
--- a/.github/workflows/pull-test-merge.yaml
+++ b/.github/workflows/pull-test-merge.yaml
@@ -33,7 +33,7 @@ jobs:
           python -m venv venv
           source venv/bin/activate
           python -m pip install --upgrade pip
-          pip install -r copilot/requirements.txt
+          pip install  --no-cache-dir -r copilot/requirements.txt
           pip install pytest
 
       - name: Create db config

From 60aa569ef12749af9b36c09684c62b12fda7231a Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 15:07:02 -0400
Subject: [PATCH 28/53] reqs to fix unit test

---
 copilot/requirements.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index af45c357..7ee3073f 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -78,7 +78,6 @@ jsonpointer==3.0.0
 kiwisolver==1.4.5
 langchain==0.2.13
 langchain-community==0.2.12
-langchain-core==0.2.30
 langchain-experimental==0.0.64
 langchain-groq==0.1.9
 langchain-ibm==0.1.12
@@ -89,7 +88,6 @@ langchainhub==0.1.21
 langdetect==1.0.9
 langgraph==0.2.3
 langgraph-checkpoint==1.0.2
-langsmith==0.1.99
 Levenshtein==0.25.1
 lomond==0.3.3
 lxml==5.3.0

From 2d377569d5aadac4396abe456320de39d4106966 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 15:17:56 -0400
Subject: [PATCH 29/53] reqs to fix unit test

---
 copilot/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index 7ee3073f..7f6269f2 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -82,7 +82,7 @@ langchain-experimental==0.0.64
 langchain-groq==0.1.9
 langchain-ibm==0.1.12
 langchain-milvus==0.1.4
-langchain-openai==0.1.21
+langchain-openai
 langchain-text-splitters==0.2.2
 langchainhub==0.1.21
 langdetect==1.0.9

From 1929aa22cba052b004a335d61941fe7f5deb0d9b Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 15:51:26 -0400
Subject: [PATCH 30/53] reqs to fix unit test

---
 .github/workflows/pull-test-merge.yaml |   2 +-
 common/requirements.txt                |   4 +-
 copilot/requirements.txt               | 239 ++++++++++++-------------
 3 files changed, 113 insertions(+), 132 deletions(-)

diff --git a/.github/workflows/pull-test-merge.yaml b/.github/workflows/pull-test-merge.yaml
index 3a61ecaf..654703d8 100644
--- a/.github/workflows/pull-test-merge.yaml
+++ b/.github/workflows/pull-test-merge.yaml
@@ -33,7 +33,7 @@ jobs:
           python -m venv venv
           source venv/bin/activate
           python -m pip install --upgrade pip
-          pip install  --no-cache-dir -r copilot/requirements.txt
+          pip install -r copilot/requirements.txt
           pip install pytest
 
       - name: Create db config
diff --git a/common/requirements.txt b/common/requirements.txt
index a8cc3d51..2d9a90ba 100644
--- a/common/requirements.txt
+++ b/common/requirements.txt
@@ -70,9 +70,9 @@ joblib==1.3.2
 jq==1.6.0
 jsonpatch==1.33
 jsonpointer==2.4
-langchain==0.2.13
+langchain==0.2.12
 langchain-community==0.2.11
-langchain-core==0.2.3
+langchain-core==0.2.29
 langchain-experimental==0.0.64
 langchain-openai==0.1.20
 langchain-text-splitters==0.2.2
diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index 7f6269f2..df06f401 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -1,175 +1,156 @@
-aiohappyeyeballs==2.3.5
-aiohttp==3.10.3
+aiohttp==3.9.3
 aiosignal==1.3.1
-annotated-types==0.7.0
-anyio==4.4.0
+annotated-types==0.5.0
+anyio==3.7.1
 appdirs==1.4.4
 argon2-cffi==23.1.0
 argon2-cffi-bindings==21.2.0
 async-timeout==4.0.3
 asyncer==0.0.7
-attrs==24.2.0
-azure-core==1.30.2
-azure-storage-blob==12.22.0
+attrs==23.1.0
+azure-core==1.30.1
+azure-storage-blob==12.19.1
 backoff==2.2.1
-beautifulsoup4==4.12.3
-boto3==1.34.159
-botocore==1.34.159
-cachetools==5.4.0
-certifi==2024.7.4
-cffi==1.17.0
+beautifulsoup4==4.12.2
+boto3==1.28.83
+botocore==1.31.83
+cachetools==5.3.2
+certifi==2023.7.22
+cffi==1.16.0
 chardet==5.2.0
-charset-normalizer==3.3.2
+charset-normalizer==3.2.0
 click==8.1.7
-contourpy==1.2.1
-cryptography==43.0.0
-cycler==0.12.1
-dataclasses-json==0.6.7
-deepdiff==7.0.1
-distro==1.9.0
+cryptography==42.0.5
+dataclasses-json==0.5.14
+distro==1.8.0
 docker-pycreds==0.4.0
 docstring_parser==0.16
-emoji==2.12.1
+emoji==2.8.0
 environs==9.5.0
-exceptiongroup==1.2.2
-fastapi==0.112.0
+exceptiongroup==1.1.3
+fastapi==0.103.1
 filelock==3.15.4
 filetype==1.2.0
-fonttools==4.53.1
-frozenlist==1.4.1
-fsspec==2024.6.1
+frozenlist==1.4.0
+fsspec==2024.6.0
 gitdb==4.0.11
-GitPython==3.1.43
-google-api-core==2.19.1
-google-auth==2.33.0
-google-cloud-aiplatform==1.61.0
-google-cloud-bigquery==3.25.0
-google-cloud-core==2.4.1
-google-cloud-resource-manager==1.12.5
-google-cloud-storage==2.18.2
+GitPython==3.1.40
+google-api-core==2.14.0
+google-auth==2.23.4
+google-cloud-aiplatform==1.52.0
+google-cloud-bigquery==3.13.0
+google-cloud-core==2.3.3
+google-cloud-resource-manager==1.10.4
+google-cloud-storage==2.13.0
 google-crc32c==1.5.0
-google-resumable-media==2.7.2
-googleapis-common-protos==1.63.2
-greenlet==3.0.3
-groq==0.9.0
-grpc-google-iam-v1==0.13.1
-grpcio==1.63.0
-grpcio-status==1.63.0
+google-resumable-media==2.6.0
+googleapis-common-protos==1.61.0
+greenlet==2.0.2
+groq==0.5.0
+grpc-google-iam-v1==0.12.7
+grpcio==1.59.2
+grpcio-status==1.59.2
 h11==0.14.0
-httpcore==1.0.5
-httptools==0.6.1
-httpx==0.27.0
-huggingface-hub==0.24.5
+httpcore==0.18.0
+httptools==0.6.0
+httpx==0.25.0
+huggingface-hub==0.23.0
 ibm-cos-sdk==2.13.6
 ibm-cos-sdk-core==2.13.6
 ibm-cos-sdk-s3transfer==2.13.6
-ibm_watsonx_ai==1.1.5
-idna==3.7
-importlib_metadata==8.2.0
+ibm_watsonx_ai==1.0.11
+idna==3.4
+importlib_metadata==8.0.0
 iniconfig==2.0.0
 isodate==0.6.1
-jiter==0.5.0
 jmespath==1.0.1
-joblib==1.4.2
-jq==1.7.0
+joblib==1.3.2
+jq==1.6.0
 jsonpatch==1.33
-jsonpath-python==1.0.6
-jsonpointer==3.0.0
-kiwisolver==1.4.5
-langchain==0.2.13
-langchain-community==0.2.12
-langchain-experimental==0.0.64
-langchain-groq==0.1.9
-langchain-ibm==0.1.12
-langchain-milvus==0.1.4
-langchain-openai
+jsonpointer==2.4
+langchain==0.2.11
+langchain-community==0.2.10
+langchain-core==0.2.25
+langchain-experimental==0.0.63
+langchain-groq==0.1.8
+langchain-ibm==0.1.11
 langchain-text-splitters==0.2.2
-langchainhub==0.1.21
-langdetect==1.0.9
-langgraph==0.2.3
-langgraph-checkpoint==1.0.2
+langchain_milvus==0.1.3
+langchain_openai==0.1.19
+langchainhub==0.1.20
 Levenshtein==0.25.1
+langdetect==1.0.9
+langgraph==0.1.16
+langsmith==0.1.94
 lomond==0.3.3
-lxml==5.3.0
-marshmallow==3.21.3
-matplotlib==3.9.2
-milvus-lite==2.4.9
-minio==7.2.7
-multidict==6.0.5
+lxml==4.9.3
+marshmallow==3.20.1
+matplotlib==3.9.1
+minio==7.2.5
+multidict==6.0.4
 mypy-extensions==1.0.0
-nest-asyncio==1.6.0
-nltk==3.8.2
+nltk==3.8.1
 numpy==1.26.4
-openai==1.40.6
-ordered-set==4.1.0
-orjson==3.10.7
-packaging==24.1
-pandas==2.1.4
+openai==1.37.1
+orjson==3.9.15
+packaging==23.2
+pandas==2.1.1
 pathtools==0.1.2
-pillow==10.4.0
-platformdirs==4.2.2
 pluggy==1.5.0
 prometheus_client==0.20.0
-proto-plus==1.24.0
-protobuf==5.27.3
-psutil==6.0.0
-pyarrow==17.0.0
-pyasn1==0.6.0
-pyasn1_modules==0.4.0
-pycparser==2.22
+proto-plus==1.22.3
+protobuf==4.24.4
+psutil==5.9.6
+pyarrow==15.0.1
+pyasn1==0.5.0
+pyasn1-modules==0.3.0
+pycparser==2.21
 pycryptodome==3.20.0
-pydantic==2.8.2
-pydantic_core==2.20.1
-pygit2==1.15.1
-pymilvus==2.4.5
-pyparsing==3.1.2
-pypdf==4.3.1
-pytest==8.3.2
+pydantic==2.3.0
+pydantic_core==2.6.3
+pygit2==1.13.2
+pymilvus==2.4.4
+pytest==8.2.0
 python-dateutil==2.9.0.post0
-python-dotenv==1.0.1
-python-iso639==2024.4.27
+python-dotenv==1.0.0
+python-iso639==2023.6.15
 python-magic==0.4.27
 pyTigerDriver==1.0.15
-pyTigerGraph==1.6.5
-pytz==2024.1
-PyYAML==6.0.2
-rapidfuzz==3.9.6
-regex==2024.7.24
+pyTigerGraph==1.6.2
+pytz==2023.3.post1
+PyYAML==6.0.1
+rapidfuzz==3.4.0
+regex==2023.10.3
 requests==2.32.2
-requests-toolbelt==1.0.0
 rsa==4.9
-s3transfer==0.10.2
+s3transfer==0.7.0
 scikit-learn==1.5.1
-scipy==1.14.0
-sentry-sdk==2.13.0
+sentry-sdk==1.32.0
 setproctitle==1.3.3
-shapely==2.0.5
+shapely==2.0.2
 six==1.16.0
 smmap==5.0.1
-sniffio==1.3.1
-soupsieve==2.6
-SQLAlchemy==2.0.32
-starlette==0.37.2
+sniffio==1.3.0
+soupsieve==2.5
+SQLAlchemy==2.0.20
+starlette==0.27.0
 tabulate==0.9.0
-tenacity==8.5.0
-threadpoolctl==3.5.0
+tenacity==8.2.3
 tiktoken==0.7.0
-tqdm==4.66.5
-types-requests==2.32.0.20240712
+tqdm==4.66.1
+types-requests==2.31.0.6
 types-urllib3==1.26.25.14
 typing-inspect==0.9.0
-typing_extensions==4.12.2
-tzdata==2024.1
-ujson==5.10.0
-unstructured==0.15.1
-unstructured-client==0.25.5
-urllib3==2.2.2
-uvicorn==0.30.6
-uvloop==0.19.0
-validators==0.33.0
-wandb==0.17.6
-watchfiles==0.23.0
-websockets==12.0
-wrapt==1.16.0
-yarl==1.9.4
-zipp==3.20.0
+typing_extensions==4.8.0
+tzdata==2023.3
+ujson==5.9.0
+unstructured==0.10.23
+urllib3==1.26.18
+uvicorn==0.23.2
+uvloop==0.17.0
+validators==0.22.0
+wandb==0.15.12
+watchfiles==0.20.0
+websockets==11.0.3
+yarl==1.9.2
+zipp==3.19.2

From f33ddef95e855bf1b400966ff4615ae64b89cdbc Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 15:52:49 -0400
Subject: [PATCH 31/53] reqs to fix unit test

---
 copilot/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index df06f401..fad0e729 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -119,7 +119,7 @@ pyTigerDriver==1.0.15
 pyTigerGraph==1.6.2
 pytz==2023.3.post1
 PyYAML==6.0.1
-rapidfuzz==3.4.0
+rapidfuzz==3.9.6
 regex==2023.10.3
 requests==2.32.2
 rsa==4.9

From 1a971813609fb9eca7849826025f1d04ea1e85b4 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 15:57:19 -0400
Subject: [PATCH 32/53] langchain-openai conflicts

---
 common/llm_services/openai_service.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/common/llm_services/openai_service.py b/common/llm_services/openai_service.py
index 22b32380..7b166398 100644
--- a/common/llm_services/openai_service.py
+++ b/common/llm_services/openai_service.py
@@ -1,6 +1,7 @@
 import logging
 import os
-from langchain_openai import ChatOpenAI
+
+from langchain_community.chat_models.openai import ChatOpenAI
 
 from common.llm_services import LLM_Model
 from common.logs.log import req_id_cv

From e9f7468e44ec311e1621fb91d4abe7b4665137f1 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 16:31:30 -0400
Subject: [PATCH 33/53] reqs to fix unit test

---
 common/requirements.txt  |   8 +-
 copilot/requirements.txt | 239 +++++++++++++++++++++------------------
 2 files changed, 134 insertions(+), 113 deletions(-)

diff --git a/common/requirements.txt b/common/requirements.txt
index 2d9a90ba..122b1b73 100644
--- a/common/requirements.txt
+++ b/common/requirements.txt
@@ -70,11 +70,11 @@ joblib==1.3.2
 jq==1.6.0
 jsonpatch==1.33
 jsonpointer==2.4
-langchain==0.2.12
-langchain-community==0.2.11
-langchain-core==0.2.29
+langchain==0.2.13
+langchain-community==0.2.12
+langchain-core==0.2.30
 langchain-experimental==0.0.64
-langchain-openai==0.1.20
+langchain-openai==0.1.21
 langchain-text-splitters==0.2.2
 langsmith==0.1.98
 Levenshtein==0.25.1
diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index fad0e729..af45c357 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -1,156 +1,177 @@
-aiohttp==3.9.3
+aiohappyeyeballs==2.3.5
+aiohttp==3.10.3
 aiosignal==1.3.1
-annotated-types==0.5.0
-anyio==3.7.1
+annotated-types==0.7.0
+anyio==4.4.0
 appdirs==1.4.4
 argon2-cffi==23.1.0
 argon2-cffi-bindings==21.2.0
 async-timeout==4.0.3
 asyncer==0.0.7
-attrs==23.1.0
-azure-core==1.30.1
-azure-storage-blob==12.19.1
+attrs==24.2.0
+azure-core==1.30.2
+azure-storage-blob==12.22.0
 backoff==2.2.1
-beautifulsoup4==4.12.2
-boto3==1.28.83
-botocore==1.31.83
-cachetools==5.3.2
-certifi==2023.7.22
-cffi==1.16.0
+beautifulsoup4==4.12.3
+boto3==1.34.159
+botocore==1.34.159
+cachetools==5.4.0
+certifi==2024.7.4
+cffi==1.17.0
 chardet==5.2.0
-charset-normalizer==3.2.0
+charset-normalizer==3.3.2
 click==8.1.7
-cryptography==42.0.5
-dataclasses-json==0.5.14
-distro==1.8.0
+contourpy==1.2.1
+cryptography==43.0.0
+cycler==0.12.1
+dataclasses-json==0.6.7
+deepdiff==7.0.1
+distro==1.9.0
 docker-pycreds==0.4.0
 docstring_parser==0.16
-emoji==2.8.0
+emoji==2.12.1
 environs==9.5.0
-exceptiongroup==1.1.3
-fastapi==0.103.1
+exceptiongroup==1.2.2
+fastapi==0.112.0
 filelock==3.15.4
 filetype==1.2.0
-frozenlist==1.4.0
-fsspec==2024.6.0
+fonttools==4.53.1
+frozenlist==1.4.1
+fsspec==2024.6.1
 gitdb==4.0.11
-GitPython==3.1.40
-google-api-core==2.14.0
-google-auth==2.23.4
-google-cloud-aiplatform==1.52.0
-google-cloud-bigquery==3.13.0
-google-cloud-core==2.3.3
-google-cloud-resource-manager==1.10.4
-google-cloud-storage==2.13.0
+GitPython==3.1.43
+google-api-core==2.19.1
+google-auth==2.33.0
+google-cloud-aiplatform==1.61.0
+google-cloud-bigquery==3.25.0
+google-cloud-core==2.4.1
+google-cloud-resource-manager==1.12.5
+google-cloud-storage==2.18.2
 google-crc32c==1.5.0
-google-resumable-media==2.6.0
-googleapis-common-protos==1.61.0
-greenlet==2.0.2
-groq==0.5.0
-grpc-google-iam-v1==0.12.7
-grpcio==1.59.2
-grpcio-status==1.59.2
+google-resumable-media==2.7.2
+googleapis-common-protos==1.63.2
+greenlet==3.0.3
+groq==0.9.0
+grpc-google-iam-v1==0.13.1
+grpcio==1.63.0
+grpcio-status==1.63.0
 h11==0.14.0
-httpcore==0.18.0
-httptools==0.6.0
-httpx==0.25.0
-huggingface-hub==0.23.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.24.5
 ibm-cos-sdk==2.13.6
 ibm-cos-sdk-core==2.13.6
 ibm-cos-sdk-s3transfer==2.13.6
-ibm_watsonx_ai==1.0.11
-idna==3.4
-importlib_metadata==8.0.0
+ibm_watsonx_ai==1.1.5
+idna==3.7
+importlib_metadata==8.2.0
 iniconfig==2.0.0
 isodate==0.6.1
+jiter==0.5.0
 jmespath==1.0.1
-joblib==1.3.2
-jq==1.6.0
+joblib==1.4.2
+jq==1.7.0
 jsonpatch==1.33
-jsonpointer==2.4
-langchain==0.2.11
-langchain-community==0.2.10
-langchain-core==0.2.25
-langchain-experimental==0.0.63
-langchain-groq==0.1.8
-langchain-ibm==0.1.11
+jsonpath-python==1.0.6
+jsonpointer==3.0.0
+kiwisolver==1.4.5
+langchain==0.2.13
+langchain-community==0.2.12
+langchain-core==0.2.30
+langchain-experimental==0.0.64
+langchain-groq==0.1.9
+langchain-ibm==0.1.12
+langchain-milvus==0.1.4
+langchain-openai==0.1.21
 langchain-text-splitters==0.2.2
-langchain_milvus==0.1.3
-langchain_openai==0.1.19
-langchainhub==0.1.20
-Levenshtein==0.25.1
+langchainhub==0.1.21
 langdetect==1.0.9
-langgraph==0.1.16
-langsmith==0.1.94
+langgraph==0.2.3
+langgraph-checkpoint==1.0.2
+langsmith==0.1.99
+Levenshtein==0.25.1
 lomond==0.3.3
-lxml==4.9.3
-marshmallow==3.20.1
-matplotlib==3.9.1
-minio==7.2.5
-multidict==6.0.4
+lxml==5.3.0
+marshmallow==3.21.3
+matplotlib==3.9.2
+milvus-lite==2.4.9
+minio==7.2.7
+multidict==6.0.5
 mypy-extensions==1.0.0
-nltk==3.8.1
+nest-asyncio==1.6.0
+nltk==3.8.2
 numpy==1.26.4
-openai==1.37.1
-orjson==3.9.15
-packaging==23.2
-pandas==2.1.1
+openai==1.40.6
+ordered-set==4.1.0
+orjson==3.10.7
+packaging==24.1
+pandas==2.1.4
 pathtools==0.1.2
+pillow==10.4.0
+platformdirs==4.2.2
 pluggy==1.5.0
 prometheus_client==0.20.0
-proto-plus==1.22.3
-protobuf==4.24.4
-psutil==5.9.6
-pyarrow==15.0.1
-pyasn1==0.5.0
-pyasn1-modules==0.3.0
-pycparser==2.21
+proto-plus==1.24.0
+protobuf==5.27.3
+psutil==6.0.0
+pyarrow==17.0.0
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pycparser==2.22
 pycryptodome==3.20.0
-pydantic==2.3.0
-pydantic_core==2.6.3
-pygit2==1.13.2
-pymilvus==2.4.4
-pytest==8.2.0
+pydantic==2.8.2
+pydantic_core==2.20.1
+pygit2==1.15.1
+pymilvus==2.4.5
+pyparsing==3.1.2
+pypdf==4.3.1
+pytest==8.3.2
 python-dateutil==2.9.0.post0
-python-dotenv==1.0.0
-python-iso639==2023.6.15
+python-dotenv==1.0.1
+python-iso639==2024.4.27
 python-magic==0.4.27
 pyTigerDriver==1.0.15
-pyTigerGraph==1.6.2
-pytz==2023.3.post1
-PyYAML==6.0.1
+pyTigerGraph==1.6.5
+pytz==2024.1
+PyYAML==6.0.2
 rapidfuzz==3.9.6
-regex==2023.10.3
+regex==2024.7.24
 requests==2.32.2
+requests-toolbelt==1.0.0
 rsa==4.9
-s3transfer==0.7.0
+s3transfer==0.10.2
 scikit-learn==1.5.1
-sentry-sdk==1.32.0
+scipy==1.14.0
+sentry-sdk==2.13.0
 setproctitle==1.3.3
-shapely==2.0.2
+shapely==2.0.5
 six==1.16.0
 smmap==5.0.1
-sniffio==1.3.0
-soupsieve==2.5
-SQLAlchemy==2.0.20
-starlette==0.27.0
+sniffio==1.3.1
+soupsieve==2.6
+SQLAlchemy==2.0.32
+starlette==0.37.2
 tabulate==0.9.0
-tenacity==8.2.3
+tenacity==8.5.0
+threadpoolctl==3.5.0
 tiktoken==0.7.0
-tqdm==4.66.1
-types-requests==2.31.0.6
+tqdm==4.66.5
+types-requests==2.32.0.20240712
 types-urllib3==1.26.25.14
 typing-inspect==0.9.0
-typing_extensions==4.8.0
-tzdata==2023.3
-ujson==5.9.0
-unstructured==0.10.23
-urllib3==1.26.18
-uvicorn==0.23.2
-uvloop==0.17.0
-validators==0.22.0
-wandb==0.15.12
-watchfiles==0.20.0
-websockets==11.0.3
-yarl==1.9.2
-zipp==3.19.2
+typing_extensions==4.12.2
+tzdata==2024.1
+ujson==5.10.0
+unstructured==0.15.1
+unstructured-client==0.25.5
+urllib3==2.2.2
+uvicorn==0.30.6
+uvloop==0.19.0
+validators==0.33.0
+wandb==0.17.6
+watchfiles==0.23.0
+websockets==12.0
+wrapt==1.16.0
+yarl==1.9.4
+zipp==3.20.0

From c8248d72e614d055f4fc1e5969373fa25d134f1e Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 16:48:48 -0400
Subject: [PATCH 34/53] reqs to fix unit test

---
 copilot/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index af45c357..e1a28c91 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -82,7 +82,7 @@ langchain-core==0.2.30
 langchain-experimental==0.0.64
 langchain-groq==0.1.9
 langchain-ibm==0.1.12
-langchain-milvus==0.1.4
+langchain-milvus==0.1.3
 langchain-openai==0.1.21
 langchain-text-splitters==0.2.2
 langchainhub==0.1.21

From 210d0fc74c57bb7919f36798822286461402fd95 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 16:58:24 -0400
Subject: [PATCH 35/53] reqs to fix unit test

---
 common/requirements.txt | 223 ++++++++++++++++++++++------------------
 1 file changed, 121 insertions(+), 102 deletions(-)

diff --git a/common/requirements.txt b/common/requirements.txt
index 122b1b73..af45c357 100644
--- a/common/requirements.txt
+++ b/common/requirements.txt
@@ -1,158 +1,177 @@
-aiochannel==1.2.1
-aiohttp==3.9.3
+aiohappyeyeballs==2.3.5
+aiohttp==3.10.3
 aiosignal==1.3.1
-annotated-types==0.5.0
-anyio==3.7.1
+annotated-types==0.7.0
+anyio==4.4.0
 appdirs==1.4.4
 argon2-cffi==23.1.0
 argon2-cffi-bindings==21.2.0
 async-timeout==4.0.3
 asyncer==0.0.7
-attrs==23.1.0
-azure-core==1.30.1
-azure-storage-blob==12.19.1
+attrs==24.2.0
+azure-core==1.30.2
+azure-storage-blob==12.22.0
 backoff==2.2.1
-beautifulsoup4==4.12.2
-boto3==1.28.83
-botocore==1.31.83
-cachetools==5.3.2
-certifi==2023.7.22
-cffi==1.16.0
+beautifulsoup4==4.12.3
+boto3==1.34.159
+botocore==1.34.159
+cachetools==5.4.0
+certifi==2024.7.4
+cffi==1.17.0
 chardet==5.2.0
-charset-normalizer==3.2.0
+charset-normalizer==3.3.2
 click==8.1.7
-cryptography==42.0.5
-dataclasses-json==0.5.14
-distro==1.8.0
+contourpy==1.2.1
+cryptography==43.0.0
+cycler==0.12.1
+dataclasses-json==0.6.7
+deepdiff==7.0.1
+distro==1.9.0
 docker-pycreds==0.4.0
 docstring_parser==0.16
-emoji==2.8.0
+emoji==2.12.1
 environs==9.5.0
-exceptiongroup==1.1.3
-fastapi==0.103.1
+exceptiongroup==1.2.2
+fastapi==0.112.0
 filelock==3.15.4
 filetype==1.2.0
-frozenlist==1.4.0
+fonttools==4.53.1
+frozenlist==1.4.1
 fsspec==2024.6.1
 gitdb==4.0.11
-GitPython==3.1.40
-google-api-core==2.14.0
-google-auth==2.23.4
-google-cloud-aiplatform==1.52.0
-google-cloud-bigquery==3.13.0
-google-cloud-core==2.3.3
-google-cloud-resource-manager==1.10.4
-google-cloud-storage==2.13.0
+GitPython==3.1.43
+google-api-core==2.19.1
+google-auth==2.33.0
+google-cloud-aiplatform==1.61.0
+google-cloud-bigquery==3.25.0
+google-cloud-core==2.4.1
+google-cloud-resource-manager==1.12.5
+google-cloud-storage==2.18.2
 google-crc32c==1.5.0
-google-resumable-media==2.6.0
-googleapis-common-protos==1.61.0
-greenlet==2.0.2
-groq==0.5.0
-grpc-google-iam-v1==0.12.7
-grpcio==1.59.2
-grpcio-status==1.59.2
+google-resumable-media==2.7.2
+googleapis-common-protos==1.63.2
+greenlet==3.0.3
+groq==0.9.0
+grpc-google-iam-v1==0.13.1
+grpcio==1.63.0
+grpcio-status==1.63.0
 h11==0.14.0
-httpcore==0.18.0
-httptools==0.6.0
-httpx==0.25.0
-huggingface-hub==0.23.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.24.5
 ibm-cos-sdk==2.13.6
 ibm-cos-sdk-core==2.13.6
 ibm-cos-sdk-s3transfer==2.13.6
-ibm_watsonx_ai==1.0.11
-idna==3.4
-importlib_metadata==8.0.0
+ibm_watsonx_ai==1.1.5
+idna==3.7
+importlib_metadata==8.2.0
 iniconfig==2.0.0
 isodate==0.6.1
 jiter==0.5.0
 jmespath==1.0.1
-joblib==1.3.2
-jq==1.6.0
+joblib==1.4.2
+jq==1.7.0
 jsonpatch==1.33
-jsonpointer==2.4
+jsonpath-python==1.0.6
+jsonpointer==3.0.0
+kiwisolver==1.4.5
 langchain==0.2.13
 langchain-community==0.2.12
 langchain-core==0.2.30
 langchain-experimental==0.0.64
+langchain-groq==0.1.9
+langchain-ibm==0.1.12
+langchain-milvus==0.1.4
 langchain-openai==0.1.21
 langchain-text-splitters==0.2.2
-langsmith==0.1.98
-Levenshtein==0.25.1
-langchain-groq==0.1.8
-langchain-ibm==0.1.11
-langchain_milvus==0.1.3
-langchainhub==0.1.20
+langchainhub==0.1.21
 langdetect==1.0.9
-langgraph==0.1.16
+langgraph==0.2.3
+langgraph-checkpoint==1.0.2
+langsmith==0.1.99
+Levenshtein==0.25.1
 lomond==0.3.3
-lxml==4.9.3
-marshmallow==3.20.1
-matplotlib==3.9.1
-minio==7.2.5
-multidict==6.0.4
+lxml==5.3.0
+marshmallow==3.21.3
+matplotlib==3.9.2
+milvus-lite==2.4.9
+minio==7.2.7
+multidict==6.0.5
 mypy-extensions==1.0.0
-nltk==3.8.1
+nest-asyncio==1.6.0
+nltk==3.8.2
 numpy==1.26.4
-openai==1.40.2
-orjson==3.9.15
-packaging==23.2
-pandas==2.1.1
+openai==1.40.6
+ordered-set==4.1.0
+orjson==3.10.7
+packaging==24.1
+pandas==2.1.4
 pathtools==0.1.2
+pillow==10.4.0
+platformdirs==4.2.2
 pluggy==1.5.0
 prometheus_client==0.20.0
-proto-plus==1.22.3
-protobuf==4.24.4
-psutil==5.9.6
-pyarrow==15.0.1
-pyasn1==0.5.0
-pyasn1-modules==0.3.0
-pycparser==2.21
+proto-plus==1.24.0
+protobuf==5.27.3
+psutil==6.0.0
+pyarrow==17.0.0
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pycparser==2.22
 pycryptodome==3.20.0
-pydantic==2.3.0
-pydantic_core==2.6.3
-pygit2==1.13.2
-pymilvus==2.4.4
-pytest==8.2.0
+pydantic==2.8.2
+pydantic_core==2.20.1
+pygit2==1.15.1
+pymilvus==2.4.5
+pyparsing==3.1.2
+pypdf==4.3.1
+pytest==8.3.2
 python-dateutil==2.9.0.post0
-python-dotenv==1.0.0
-python-iso639==2023.6.15
+python-dotenv==1.0.1
+python-iso639==2024.4.27
 python-magic==0.4.27
 pyTigerDriver==1.0.15
 pyTigerGraph==1.6.5
-pytz==2023.3.post1
-PyYAML==6.0.1
+pytz==2024.1
+PyYAML==6.0.2
 rapidfuzz==3.9.6
-regex==2023.10.3
+regex==2024.7.24
 requests==2.32.2
+requests-toolbelt==1.0.0
 rsa==4.9
-s3transfer==0.7.0
+s3transfer==0.10.2
 scikit-learn==1.5.1
-sentry-sdk==1.32.0
+scipy==1.14.0
+sentry-sdk==2.13.0
 setproctitle==1.3.3
-shapely==2.0.2
+shapely==2.0.5
 six==1.16.0
 smmap==5.0.1
-sniffio==1.3.0
-soupsieve==2.5
-SQLAlchemy==2.0.20
-starlette==0.27.0
+sniffio==1.3.1
+soupsieve==2.6
+SQLAlchemy==2.0.32
+starlette==0.37.2
 tabulate==0.9.0
-tenacity==8.2.3
+tenacity==8.5.0
+threadpoolctl==3.5.0
 tiktoken==0.7.0
-tqdm==4.66.1
-types-requests==2.31.0.6
+tqdm==4.66.5
+types-requests==2.32.0.20240712
 types-urllib3==1.26.25.14
 typing-inspect==0.9.0
 typing_extensions==4.12.2
-tzdata==2023.3
-ujson==5.9.0
-unstructured==0.10.23
-urllib3==1.26.18
-uvicorn==0.23.2
-uvloop==0.17.0
-validators==0.22.0
-wandb==0.15.12
-watchfiles==0.20.0
-websockets==11.0.3
-yarl==1.9.2
-zipp==3.19.2
+tzdata==2024.1
+ujson==5.10.0
+unstructured==0.15.1
+unstructured-client==0.25.5
+urllib3==2.2.2
+uvicorn==0.30.6
+uvloop==0.19.0
+validators==0.33.0
+wandb==0.17.6
+watchfiles==0.23.0
+websockets==12.0
+wrapt==1.16.0
+yarl==1.9.4
+zipp==3.20.0

From 9c8b183273e5649d36c4a6e8bdd0f42c198df77c Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 17:06:59 -0400
Subject: [PATCH 36/53] reqs to fix unit test

---
 common/requirements.txt  | 2 --
 copilot/requirements.txt | 2 --
 2 files changed, 4 deletions(-)

diff --git a/common/requirements.txt b/common/requirements.txt
index af45c357..97fe5736 100644
--- a/common/requirements.txt
+++ b/common/requirements.txt
@@ -95,7 +95,6 @@ lomond==0.3.3
 lxml==5.3.0
 marshmallow==3.21.3
 matplotlib==3.9.2
-milvus-lite==2.4.9
 minio==7.2.7
 multidict==6.0.5
 mypy-extensions==1.0.0
@@ -123,7 +122,6 @@ pycryptodome==3.20.0
 pydantic==2.8.2
 pydantic_core==2.20.1
 pygit2==1.15.1
-pymilvus==2.4.5
 pyparsing==3.1.2
 pypdf==4.3.1
 pytest==8.3.2
diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index e1a28c91..d2426a03 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -95,7 +95,6 @@ lomond==0.3.3
 lxml==5.3.0
 marshmallow==3.21.3
 matplotlib==3.9.2
-milvus-lite==2.4.9
 minio==7.2.7
 multidict==6.0.5
 mypy-extensions==1.0.0
@@ -123,7 +122,6 @@ pycryptodome==3.20.0
 pydantic==2.8.2
 pydantic_core==2.20.1
 pygit2==1.15.1
-pymilvus==2.4.5
 pyparsing==3.1.2
 pypdf==4.3.1
 pytest==8.3.2

From e4d8168dfe4d3c44ba57844a3e6abbe2472ac8a2 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 17:11:28 -0400
Subject: [PATCH 37/53] reqs to fix unit test

---
 common/requirements.txt  | 2 ++
 copilot/requirements.txt | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/common/requirements.txt b/common/requirements.txt
index 97fe5736..af45c357 100644
--- a/common/requirements.txt
+++ b/common/requirements.txt
@@ -95,6 +95,7 @@ lomond==0.3.3
 lxml==5.3.0
 marshmallow==3.21.3
 matplotlib==3.9.2
+milvus-lite==2.4.9
 minio==7.2.7
 multidict==6.0.5
 mypy-extensions==1.0.0
@@ -122,6 +123,7 @@ pycryptodome==3.20.0
 pydantic==2.8.2
 pydantic_core==2.20.1
 pygit2==1.15.1
+pymilvus==2.4.5
 pyparsing==3.1.2
 pypdf==4.3.1
 pytest==8.3.2
diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index d2426a03..af45c357 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -82,7 +82,7 @@ langchain-core==0.2.30
 langchain-experimental==0.0.64
 langchain-groq==0.1.9
 langchain-ibm==0.1.12
-langchain-milvus==0.1.3
+langchain-milvus==0.1.4
 langchain-openai==0.1.21
 langchain-text-splitters==0.2.2
 langchainhub==0.1.21
@@ -95,6 +95,7 @@ lomond==0.3.3
 lxml==5.3.0
 marshmallow==3.21.3
 matplotlib==3.9.2
+milvus-lite==2.4.9
 minio==7.2.7
 multidict==6.0.5
 mypy-extensions==1.0.0
@@ -122,6 +123,7 @@ pycryptodome==3.20.0
 pydantic==2.8.2
 pydantic_core==2.20.1
 pygit2==1.15.1
+pymilvus==2.4.5
 pyparsing==3.1.2
 pypdf==4.3.1
 pytest==8.3.2

From 538653f2f09c3abd0d1df456d501758b776e9f57 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 17:24:20 -0400
Subject: [PATCH 38/53] reqs to fix unit tests

---
 common/embeddings/milvus_embedding_store.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/common/embeddings/milvus_embedding_store.py b/common/embeddings/milvus_embedding_store.py
index 7169379e..de7812fd 100644
--- a/common/embeddings/milvus_embedding_store.py
+++ b/common/embeddings/milvus_embedding_store.py
@@ -7,7 +7,8 @@
 from asyncer import asyncify
 from langchain_community.vectorstores import Milvus
 from langchain_core.documents.base import Document
-from langchain_milvus.vectorstores import Milvus
+# from langchain_milvus.vectorstores import Milvus
+from langchain_community.vectorstores.milvus import Milvus
 from pymilvus import MilvusException, connections, utility
 from pymilvus.exceptions import MilvusException
 

From a63d3768971f6ddd01dfc59e7cd33a15ec073aa4 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 17:28:32 -0400
Subject: [PATCH 39/53] reqs to fix unit tests

---
 common/embeddings/embedding_services.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/common/embeddings/embedding_services.py b/common/embeddings/embedding_services.py
index 13c2cfd0..8020b97f 100644
--- a/common/embeddings/embedding_services.py
+++ b/common/embeddings/embedding_services.py
@@ -134,7 +134,8 @@ def __init__(self, config):
         super().__init__(
             config, model_name=config.get("model_name", "OpenAI gpt-4-0613")
         )
-        from langchain_openai import OpenAIEmbeddings
+        # from langchain_openai import OpenAIEmbeddings
+        from langchain_community.embeddings.openai import OpenAIEmbeddings
 
         self.embeddings = OpenAIEmbeddings()
 

From fe6643c79af599316cc6bf397c3ffac4053fa361 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 19:55:04 -0400
Subject: [PATCH 40/53] smoke test

---
 .../graphRAG/louvain/graphrag_louvain_communities.gsql    | 2 +-
 common/gsql/supportai/Scan_For_Updates.gsql               | 8 ++++----
 common/llm_services/openai_service.py                     | 2 +-
 copilot/app/routers/supportai.py                          | 7 -------
 copilot/requirements.txt                                  | 1 +
 eventual-consistency-service/app/graphrag/util.py         | 2 +-
 6 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/common/gsql/graphRAG/louvain/graphrag_louvain_communities.gsql b/common/gsql/graphRAG/louvain/graphrag_louvain_communities.gsql
index 4137ca68..241ccaf0 100644
--- a/common/gsql/graphRAG/louvain/graphrag_louvain_communities.gsql
+++ b/common/gsql/graphRAG/louvain/graphrag_louvain_communities.gsql
@@ -172,7 +172,7 @@ CREATE DISTRIBUTED QUERY graphrag_louvain_communities(UINT iteration=1, UINT max
           POST-ACCUM
               // Write the results to a new community vertex (iteration + 1)
               //                              ID                      , iter, edges within the community
-              INSERT INTO Community VALUES (s.id+"_"+to_string(iteration+1), iteration+1), ""),
+              INSERT INTO Community VALUES (s.id+"_"+to_string(iteration+1), iteration+1, ""),
               INSERT INTO HAS_PARENT VALUES (s, s.@community_vid+"_"+to_string(iteration+1)) // link Community's child/parent community
           ;  
 
diff --git a/common/gsql/supportai/Scan_For_Updates.gsql b/common/gsql/supportai/Scan_For_Updates.gsql
index ba5444bd..7d9d1b83 100644
--- a/common/gsql/supportai/Scan_For_Updates.gsql
+++ b/common/gsql/supportai/Scan_For_Updates.gsql
@@ -24,10 +24,10 @@ CREATE DISTRIBUTED QUERY Scan_For_Updates(STRING v_type = "Document",
       res = SELECT s FROM start:s -(HAS_CONTENT)-> Content:c
             ACCUM @@v_and_text += (s.id -> c.text)
             POST-ACCUM s.epoch_processing = datetime_to_epoch(now());
-  ELSE IF v_type == "Concept" THEN
-      res = SELECT s FROM start:s
-            POST-ACCUM @@v_and_text += (s.id -> s.description),
-            s.epoch_processing = datetime_to_epoch(now());
+  // ELSE IF v_type == "Concept" THEN
+  //     res = SELECT s FROM start:s
+  //           POST-ACCUM @@v_and_text += (s.id -> s.description),
+  //           s.epoch_processing = datetime_to_epoch(now());
   ELSE IF v_type == "Entity" THEN
       res = SELECT s FROM start:s
             POST-ACCUM @@v_and_text += (s.id -> s.definition),
diff --git a/common/llm_services/openai_service.py b/common/llm_services/openai_service.py
index 7b166398..4f70b8cf 100644
--- a/common/llm_services/openai_service.py
+++ b/common/llm_services/openai_service.py
@@ -1,7 +1,7 @@
 import logging
 import os
 
-from langchain_community.chat_models.openai import ChatOpenAI
+from langchain_openai.chat_models import ChatOpenAI
 
 from common.llm_services import LLM_Model
 from common.logs.log import req_id_cv
diff --git a/copilot/app/routers/supportai.py b/copilot/app/routers/supportai.py
index 7b09acc9..0eff3c41 100644
--- a/copilot/app/routers/supportai.py
+++ b/copilot/app/routers/supportai.py
@@ -18,13 +18,6 @@
     HNSWSiblingRetriever,
 )
 
-from common.config import (
-    db_config,
-    embedding_service,
-    embedding_store,
-    get_llm_service,
-    llm_config,
-)
 from common.config import (
     db_config,
     embedding_service,
diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index af45c357..d287660f 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -1,3 +1,4 @@
+aiochannel==1.2.1
 aiohappyeyeballs==2.3.5
 aiohttp==3.10.3
 aiosignal==1.3.1
diff --git a/eventual-consistency-service/app/graphrag/util.py b/eventual-consistency-service/app/graphrag/util.py
index bcf1befe..186ab11a 100644
--- a/eventual-consistency-service/app/graphrag/util.py
+++ b/eventual-consistency-service/app/graphrag/util.py
@@ -111,7 +111,7 @@ async def init(
                 vector_field=milvus_config.get("vector_field", "document_vector"),
                 text_field=milvus_config.get("text_field", "document_content"),
                 vertex_field=vertex_field,
-                drop_old=True,
+                drop_old=False,
             )
 
             LogWriter.info(f"Initializing {name}")

From 64b3998e3d1a3838e46848eb9d69954ccf12b763 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 20:03:03 -0400
Subject: [PATCH 41/53] smoke test

---
 .github/workflows/pull-test-merge.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/pull-test-merge.yaml b/.github/workflows/pull-test-merge.yaml
index 654703d8..a7c93c7e 100644
--- a/.github/workflows/pull-test-merge.yaml
+++ b/.github/workflows/pull-test-merge.yaml
@@ -34,6 +34,7 @@ jobs:
           source venv/bin/activate
           python -m pip install --upgrade pip
           pip install -r copilot/requirements.txt
+          pip install -U langchain-core
           pip install pytest
 
       - name: Create db config

From e08d42a5d498615679b9859e93b3f67e94d70d0f Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 20:07:55 -0400
Subject: [PATCH 42/53] smoke test

---
 .github/workflows/pull-test-merge.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pull-test-merge.yaml b/.github/workflows/pull-test-merge.yaml
index a7c93c7e..e7cdd5a1 100644
--- a/.github/workflows/pull-test-merge.yaml
+++ b/.github/workflows/pull-test-merge.yaml
@@ -34,7 +34,7 @@ jobs:
           source venv/bin/activate
           python -m pip install --upgrade pip
           pip install -r copilot/requirements.txt
-          pip install -U langchain-core
+          pip install -U langchain-core langchain
           pip install pytest
 
       - name: Create db config

From 17b09df8611321363012db71a75d3fc404ee0e54 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 20:12:16 -0400
Subject: [PATCH 43/53] smoke test

---
 copilot/requirements.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index d287660f..662ec077 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -79,12 +79,11 @@ jsonpointer==3.0.0
 kiwisolver==1.4.5
 langchain==0.2.13
 langchain-community==0.2.12
-langchain-core==0.2.30
 langchain-experimental==0.0.64
 langchain-groq==0.1.9
 langchain-ibm==0.1.12
 langchain-milvus==0.1.4
-langchain-openai==0.1.21
+langchain-openai==0.1.20
 langchain-text-splitters==0.2.2
 langchainhub==0.1.21
 langdetect==1.0.9

From 6ce885f341bef5ae1fa5b1216d7dfa7254d3a17c Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 20:22:12 -0400
Subject: [PATCH 44/53] smoke test

---
 .github/workflows/pull-test-merge.yaml | 1 -
 copilot/requirements.txt               | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pull-test-merge.yaml b/.github/workflows/pull-test-merge.yaml
index e7cdd5a1..654703d8 100644
--- a/.github/workflows/pull-test-merge.yaml
+++ b/.github/workflows/pull-test-merge.yaml
@@ -34,7 +34,6 @@ jobs:
           source venv/bin/activate
           python -m pip install --upgrade pip
           pip install -r copilot/requirements.txt
-          pip install -U langchain-core langchain
           pip install pytest
 
       - name: Create db config
diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index 662ec077..e4da4613 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -87,6 +87,7 @@ langchain-openai==0.1.20
 langchain-text-splitters==0.2.2
 langchainhub==0.1.21
 langdetect==1.0.9
+langchain-core==0.2.29
 langgraph==0.2.3
 langgraph-checkpoint==1.0.2
 langsmith==0.1.99

From 442564bde03c69974ed8a953c5baa04d68681964 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 20:25:14 -0400
Subject: [PATCH 45/53] smoke test

---
 copilot/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index e4da4613..56b5f71f 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -77,7 +77,7 @@ jsonpatch==1.33
 jsonpath-python==1.0.6
 jsonpointer==3.0.0
 kiwisolver==1.4.5
-langchain==0.2.13
+langchain==0.2.12
 langchain-community==0.2.12
 langchain-experimental==0.0.64
 langchain-groq==0.1.9

From 2d8675eb540caa4fca57283f24f4ded62ec90752 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 20:32:40 -0400
Subject: [PATCH 46/53] smoke test

---
 copilot/requirements.txt | 240 ++++++++++++++++++---------------------
 1 file changed, 109 insertions(+), 131 deletions(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index 56b5f71f..f737ed1f 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -1,178 +1,156 @@
-aiochannel==1.2.1
-aiohappyeyeballs==2.3.5
-aiohttp==3.10.3
+aiohttp==3.9.3
 aiosignal==1.3.1
-annotated-types==0.7.0
-anyio==4.4.0
+annotated-types==0.5.0
+anyio==3.7.1
 appdirs==1.4.4
 argon2-cffi==23.1.0
 argon2-cffi-bindings==21.2.0
 async-timeout==4.0.3
 asyncer==0.0.7
-attrs==24.2.0
-azure-core==1.30.2
-azure-storage-blob==12.22.0
+attrs==23.1.0
+azure-core==1.30.1
+azure-storage-blob==12.19.1
 backoff==2.2.1
-beautifulsoup4==4.12.3
-boto3==1.34.159
-botocore==1.34.159
-cachetools==5.4.0
-certifi==2024.7.4
-cffi==1.17.0
+beautifulsoup4==4.12.2
+boto3==1.28.83
+botocore==1.31.83
+cachetools==5.3.2
+certifi==2023.7.22
+cffi==1.16.0
 chardet==5.2.0
-charset-normalizer==3.3.2
+charset-normalizer==3.2.0
 click==8.1.7
-contourpy==1.2.1
-cryptography==43.0.0
-cycler==0.12.1
-dataclasses-json==0.6.7
-deepdiff==7.0.1
-distro==1.9.0
+cryptography==42.0.5
+dataclasses-json==0.5.14
+distro==1.8.0
 docker-pycreds==0.4.0
 docstring_parser==0.16
-emoji==2.12.1
+emoji==2.8.0
 environs==9.5.0
-exceptiongroup==1.2.2
-fastapi==0.112.0
+exceptiongroup==1.1.3
+fastapi==0.103.1
 filelock==3.15.4
 filetype==1.2.0
-fonttools==4.53.1
-frozenlist==1.4.1
-fsspec==2024.6.1
+frozenlist==1.4.0
+fsspec==2024.6.0
 gitdb==4.0.11
-GitPython==3.1.43
-google-api-core==2.19.1
-google-auth==2.33.0
-google-cloud-aiplatform==1.61.0
-google-cloud-bigquery==3.25.0
-google-cloud-core==2.4.1
-google-cloud-resource-manager==1.12.5
-google-cloud-storage==2.18.2
+GitPython==3.1.40
+google-api-core==2.14.0
+google-auth==2.23.4
+google-cloud-aiplatform==1.52.0
+google-cloud-bigquery==3.13.0
+google-cloud-core==2.3.3
+google-cloud-resource-manager==1.10.4
+google-cloud-storage==2.13.0
 google-crc32c==1.5.0
-google-resumable-media==2.7.2
-googleapis-common-protos==1.63.2
-greenlet==3.0.3
-groq==0.9.0
-grpc-google-iam-v1==0.13.1
-grpcio==1.63.0
-grpcio-status==1.63.0
+google-resumable-media==2.6.0
+googleapis-common-protos==1.61.0
+greenlet==2.0.2
+groq==0.5.0
+grpc-google-iam-v1==0.12.7
+grpcio==1.59.2
+grpcio-status==1.59.2
 h11==0.14.0
-httpcore==1.0.5
-httptools==0.6.1
-httpx==0.27.0
-huggingface-hub==0.24.5
+httpcore==0.18.0
+httptools==0.6.0
+httpx==0.25.0
+huggingface-hub==0.23.0
 ibm-cos-sdk==2.13.6
 ibm-cos-sdk-core==2.13.6
 ibm-cos-sdk-s3transfer==2.13.6
-ibm_watsonx_ai==1.1.5
-idna==3.7
-importlib_metadata==8.2.0
+ibm_watsonx_ai==1.0.11
+idna==3.4
+importlib_metadata==8.0.0
 iniconfig==2.0.0
 isodate==0.6.1
-jiter==0.5.0
 jmespath==1.0.1
-joblib==1.4.2
-jq==1.7.0
+joblib==1.3.2
+jq==1.6.0
 jsonpatch==1.33
-jsonpath-python==1.0.6
-jsonpointer==3.0.0
-kiwisolver==1.4.5
-langchain==0.2.12
-langchain-community==0.2.12
-langchain-experimental==0.0.64
-langchain-groq==0.1.9
-langchain-ibm==0.1.12
-langchain-milvus==0.1.4
-langchain-openai==0.1.20
+jsonpointer==2.4
+langchain==0.2.11
+langchain-community==0.2.10
+langchain-core==0.2.25
+langchain-experimental==0.0.63
+langchain-groq==0.1.8
+langchain-ibm==0.1.11
 langchain-text-splitters==0.2.2
-langchainhub==0.1.21
+langchain_milvus==0.1.3
+langchain_openai==0.1.19
+langchainhub==0.1.20
 langdetect==1.0.9
-langchain-core==0.2.29
-langgraph==0.2.3
-langgraph-checkpoint==1.0.2
-langsmith==0.1.99
+langgraph==0.1.16
+langsmith==0.1.94
 Levenshtein==0.25.1
 lomond==0.3.3
-lxml==5.3.0
-marshmallow==3.21.3
-matplotlib==3.9.2
-milvus-lite==2.4.9
-minio==7.2.7
-multidict==6.0.5
+lxml==4.9.3
+marshmallow==3.20.1
+matplotlib==3.9.1
+minio==7.2.5
+multidict==6.0.4
 mypy-extensions==1.0.0
-nest-asyncio==1.6.0
-nltk==3.8.2
+nltk==3.8.1
 numpy==1.26.4
-openai==1.40.6
-ordered-set==4.1.0
-orjson==3.10.7
-packaging==24.1
-pandas==2.1.4
+openai==1.37.1
+orjson==3.9.15
+packaging==23.2
+pandas==2.1.1
 pathtools==0.1.2
-pillow==10.4.0
-platformdirs==4.2.2
 pluggy==1.5.0
 prometheus_client==0.20.0
-proto-plus==1.24.0
-protobuf==5.27.3
-psutil==6.0.0
-pyarrow==17.0.0
-pyasn1==0.6.0
-pyasn1_modules==0.4.0
-pycparser==2.22
+proto-plus==1.22.3
+protobuf==4.24.4
+psutil==5.9.6
+pyarrow==15.0.1
+pyasn1==0.5.0
+pyasn1-modules==0.3.0
+pycparser==2.21
 pycryptodome==3.20.0
-pydantic==2.8.2
-pydantic_core==2.20.1
-pygit2==1.15.1
-pymilvus==2.4.5
-pyparsing==3.1.2
-pypdf==4.3.1
-pytest==8.3.2
+pydantic==2.3.0
+pydantic_core==2.6.3
+pygit2==1.13.2
+pymilvus==2.4.4
+pytest==8.2.0
 python-dateutil==2.9.0.post0
-python-dotenv==1.0.1
-python-iso639==2024.4.27
+python-dotenv==1.0.0
+python-iso639==2023.6.15
 python-magic==0.4.27
 pyTigerDriver==1.0.15
-pyTigerGraph==1.6.5
-pytz==2024.1
-PyYAML==6.0.2
-rapidfuzz==3.9.6
-regex==2024.7.24
+pyTigerGraph==1.6.2
+pytz==2023.3.post1
+PyYAML==6.0.1
+rapidfuzz==3.8.0
+regex==2023.10.3
 requests==2.32.2
-requests-toolbelt==1.0.0
 rsa==4.9
-s3transfer==0.10.2
+s3transfer==0.7.0
 scikit-learn==1.5.1
-scipy==1.14.0
-sentry-sdk==2.13.0
+sentry-sdk==1.32.0
 setproctitle==1.3.3
-shapely==2.0.5
+shapely==2.0.2
 six==1.16.0
 smmap==5.0.1
-sniffio==1.3.1
-soupsieve==2.6
-SQLAlchemy==2.0.32
-starlette==0.37.2
+sniffio==1.3.0
+soupsieve==2.5
+SQLAlchemy==2.0.20
+starlette==0.27.0
 tabulate==0.9.0
-tenacity==8.5.0
-threadpoolctl==3.5.0
+tenacity==8.2.3
 tiktoken==0.7.0
-tqdm==4.66.5
-types-requests==2.32.0.20240712
+tqdm==4.66.1
+types-requests==2.31.0.6
 types-urllib3==1.26.25.14
 typing-inspect==0.9.0
-typing_extensions==4.12.2
-tzdata==2024.1
-ujson==5.10.0
-unstructured==0.15.1
-unstructured-client==0.25.5
-urllib3==2.2.2
-uvicorn==0.30.6
-uvloop==0.19.0
-validators==0.33.0
-wandb==0.17.6
-watchfiles==0.23.0
-websockets==12.0
-wrapt==1.16.0
-yarl==1.9.4
-zipp==3.20.0
+typing_extensions==4.8.0
+tzdata==2023.3
+ujson==5.9.0
+unstructured==0.10.23
+urllib3==1.26.18
+uvicorn==0.23.2
+uvloop==0.17.0
+validators==0.22.0
+wandb==0.15.12
+watchfiles==0.20.0
+websockets==11.0.3
+yarl==1.9.2
+zipp==3.19.2

From e9f5e9d2719e68022f742f5dd2d50943669eb051 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 20:42:56 -0400
Subject: [PATCH 47/53] smoke test

---
 common/embeddings/milvus_embedding_store.py | 4 ++--
 copilot/requirements.txt                    | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/common/embeddings/milvus_embedding_store.py b/common/embeddings/milvus_embedding_store.py
index de7812fd..c60a8e2e 100644
--- a/common/embeddings/milvus_embedding_store.py
+++ b/common/embeddings/milvus_embedding_store.py
@@ -7,8 +7,8 @@
 from asyncer import asyncify
 from langchain_community.vectorstores import Milvus
 from langchain_core.documents.base import Document
-# from langchain_milvus.vectorstores import Milvus
-from langchain_community.vectorstores.milvus import Milvus
+from langchain_milvus.vectorstores import Milvus
+# from langchain_community.vectorstores.milvus import Milvus
 from pymilvus import MilvusException, connections, utility
 from pymilvus.exceptions import MilvusException
 
diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index f737ed1f..98af8b4b 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -70,13 +70,13 @@ jsonpatch==1.33
 jsonpointer==2.4
 langchain==0.2.11
 langchain-community==0.2.10
-langchain-core==0.2.25
+# langchain-core==0.2.25
 langchain-experimental==0.0.63
 langchain-groq==0.1.8
 langchain-ibm==0.1.11
 langchain-text-splitters==0.2.2
 langchain_milvus==0.1.3
-langchain_openai==0.1.19
+langchain_openai==0.1.20
 langchainhub==0.1.20
 langdetect==1.0.9
 langgraph==0.1.16

From 0ca73a31dfb653b8cab07111a4db019e000171d9 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 20:50:02 -0400
Subject: [PATCH 48/53] smoke test

---
 .github/workflows/pull-test-merge.yaml | 2 ++
 copilot/requirements.txt               | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pull-test-merge.yaml b/.github/workflows/pull-test-merge.yaml
index 654703d8..20024b34 100644
--- a/.github/workflows/pull-test-merge.yaml
+++ b/.github/workflows/pull-test-merge.yaml
@@ -64,6 +64,8 @@ jobs:
       - name: Run pytest
         run: |
           source venv/bin/activate
+          pip install -r copilot/requirements.txt
+          pip install -U langchain langchain-core
           cp -r copilot/tests/*test* copilot/tests/create_wandb_report.py copilot/app/ 
           cd copilot/app
           python -m pytest --disable-warnings
diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index 98af8b4b..ac0a6ba6 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -70,7 +70,6 @@ jsonpatch==1.33
 jsonpointer==2.4
 langchain==0.2.11
 langchain-community==0.2.10
-# langchain-core==0.2.25
 langchain-experimental==0.0.63
 langchain-groq==0.1.8
 langchain-ibm==0.1.11

From 8252c1ecb193f11d570f27263afa1ff990814806 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 20:58:48 -0400
Subject: [PATCH 49/53] smoke test

---
 copilot/requirements.txt | 247 +++++++++++++++++++++------------------
 1 file changed, 135 insertions(+), 112 deletions(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index ac0a6ba6..5aed6147 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -1,155 +1,178 @@
-aiohttp==3.9.3
+aiohappyeyeballs==2.3.5
+aiohttp==3.10.3
 aiosignal==1.3.1
-annotated-types==0.5.0
-anyio==3.7.1
+annotated-types==0.7.0
+anyio==4.4.0
 appdirs==1.4.4
 argon2-cffi==23.1.0
 argon2-cffi-bindings==21.2.0
 async-timeout==4.0.3
 asyncer==0.0.7
-attrs==23.1.0
-azure-core==1.30.1
-azure-storage-blob==12.19.1
+attrs==24.2.0
+azure-core==1.30.2
+azure-storage-blob==12.22.0
 backoff==2.2.1
-beautifulsoup4==4.12.2
-boto3==1.28.83
-botocore==1.31.83
-cachetools==5.3.2
-certifi==2023.7.22
-cffi==1.16.0
+beautifulsoup4==4.12.3
+boto3==1.34.160
+botocore==1.34.160
+cachetools==5.4.0
+certifi==2024.7.4
+cffi==1.17.0
 chardet==5.2.0
-charset-normalizer==3.2.0
+charset-normalizer==3.3.2
 click==8.1.7
-cryptography==42.0.5
-dataclasses-json==0.5.14
-distro==1.8.0
+contourpy==1.2.1
+cryptography==43.0.0
+cycler==0.12.1
+dataclasses-json==0.6.7
+deepdiff==7.0.1
+distro==1.9.0
 docker-pycreds==0.4.0
-docstring_parser==0.16
-emoji==2.8.0
+docstring-parser==0.16
+emoji==2.12.1
 environs==9.5.0
-exceptiongroup==1.1.3
-fastapi==0.103.1
+exceptiongroup==1.2.2
+fastapi==0.112.0
 filelock==3.15.4
 filetype==1.2.0
-frozenlist==1.4.0
-fsspec==2024.6.0
+fonttools==4.53.1
+frozenlist==1.4.1
+fsspec==2024.6.1
 gitdb==4.0.11
-GitPython==3.1.40
-google-api-core==2.14.0
-google-auth==2.23.4
-google-cloud-aiplatform==1.52.0
-google-cloud-bigquery==3.13.0
-google-cloud-core==2.3.3
-google-cloud-resource-manager==1.10.4
-google-cloud-storage==2.13.0
+gitpython==3.1.43
+google-api-core==2.19.1
+google-auth==2.33.0
+google-cloud-aiplatform==1.62.0
+google-cloud-bigquery==3.25.0
+google-cloud-core==2.4.1
+google-cloud-resource-manager==1.12.5
+google-cloud-storage==2.18.2
 google-crc32c==1.5.0
-google-resumable-media==2.6.0
-googleapis-common-protos==1.61.0
-greenlet==2.0.2
-groq==0.5.0
-grpc-google-iam-v1==0.12.7
-grpcio==1.59.2
-grpcio-status==1.59.2
+google-resumable-media==2.7.2
+googleapis-common-protos==1.63.2
+greenlet==3.0.3
+groq==0.9.0
+grpc-google-iam-v1==0.13.1
+grpcio==1.63.0
+grpcio-status==1.63.0
 h11==0.14.0
-httpcore==0.18.0
-httptools==0.6.0
-httpx==0.25.0
-huggingface-hub==0.23.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.24.5
 ibm-cos-sdk==2.13.6
 ibm-cos-sdk-core==2.13.6
 ibm-cos-sdk-s3transfer==2.13.6
-ibm_watsonx_ai==1.0.11
-idna==3.4
-importlib_metadata==8.0.0
+ibm-watsonx-ai==1.1.5
+idna==3.7
+importlib-metadata==8.2.0
 iniconfig==2.0.0
 isodate==0.6.1
+jiter==0.5.0
 jmespath==1.0.1
-joblib==1.3.2
-jq==1.6.0
+joblib==1.4.2
+jq==1.7.0
 jsonpatch==1.33
-jsonpointer==2.4
-langchain==0.2.11
-langchain-community==0.2.10
-langchain-experimental==0.0.63
-langchain-groq==0.1.8
-langchain-ibm==0.1.11
+jsonpath-python==1.0.6
+jsonpointer==3.0.0
+kiwisolver==1.4.5
+langchain==0.2.13
+langchain-community==0.2.12
+langchain-core==0.2.30
+langchain-experimental==0.0.64
+langchain-groq==0.1.9
+langchain-ibm==0.1.12
+langchain-milvus==0.1.4
+langchain-openai==0.1.21
 langchain-text-splitters==0.2.2
-langchain_milvus==0.1.3
-langchain_openai==0.1.20
-langchainhub==0.1.20
+langchainhub==0.1.21
 langdetect==1.0.9
-langgraph==0.1.16
-langsmith==0.1.94
-Levenshtein==0.25.1
+langgraph==0.2.3
+langgraph-checkpoint==1.0.2
+langsmith==0.1.99
+levenshtein==0.25.1
 lomond==0.3.3
-lxml==4.9.3
-marshmallow==3.20.1
-matplotlib==3.9.1
-minio==7.2.5
-multidict==6.0.4
+lxml==5.3.0
+marshmallow==3.21.3
+matplotlib==3.9.2
+milvus-lite==2.4.9
+minio==7.2.7
+multidict==6.0.5
 mypy-extensions==1.0.0
-nltk==3.8.1
+nest-asyncio==1.6.0
+nltk==3.8.2
 numpy==1.26.4
-openai==1.37.1
-orjson==3.9.15
-packaging==23.2
-pandas==2.1.1
+openai==1.40.6
+ordered-set==4.1.0
+orjson==3.10.7
+packaging==24.1
+pandas==2.1.4
 pathtools==0.1.2
+pillow==10.4.0
+platformdirs==4.2.2
 pluggy==1.5.0
-prometheus_client==0.20.0
-proto-plus==1.22.3
-protobuf==4.24.4
-psutil==5.9.6
-pyarrow==15.0.1
-pyasn1==0.5.0
-pyasn1-modules==0.3.0
-pycparser==2.21
+prometheus-client==0.20.0
+proto-plus==1.24.0
+protobuf==5.27.3
+psutil==6.0.0
+pyarrow==17.0.0
+pyasn1==0.6.0
+pyasn1-modules==0.4.0
+pycparser==2.22
 pycryptodome==3.20.0
-pydantic==2.3.0
-pydantic_core==2.6.3
-pygit2==1.13.2
-pymilvus==2.4.4
-pytest==8.2.0
+pydantic==2.8.2
+pydantic-core==2.20.1
+pygit2==1.15.1
+pymilvus==2.4.5
+pyparsing==3.1.2
+pypdf==4.3.1
+pytest==8.3.2
 python-dateutil==2.9.0.post0
-python-dotenv==1.0.0
-python-iso639==2023.6.15
+python-dotenv==1.0.1
+python-iso639==2024.4.27
 python-magic==0.4.27
-pyTigerDriver==1.0.15
-pyTigerGraph==1.6.2
-pytz==2023.3.post1
-PyYAML==6.0.1
-rapidfuzz==3.8.0
-regex==2023.10.3
+pytigerdriver==1.0.15
+pytigergraph==1.6.5
+pytz==2024.1
+pyyaml==6.0.2
+rapidfuzz==3.9.6
+regex==2024.7.24
 requests==2.32.2
+requests-toolbelt==1.0.0
 rsa==4.9
-s3transfer==0.7.0
+s3transfer==0.10.2
 scikit-learn==1.5.1
-sentry-sdk==1.32.0
+scipy==1.14.0
+sentry-sdk==2.13.0
 setproctitle==1.3.3
-shapely==2.0.2
+setuptools==72.2.0
+shapely==2.0.5
 six==1.16.0
 smmap==5.0.1
-sniffio==1.3.0
-soupsieve==2.5
-SQLAlchemy==2.0.20
-starlette==0.27.0
+sniffio==1.3.1
+soupsieve==2.6
+sqlalchemy==2.0.32
+starlette==0.37.2
 tabulate==0.9.0
-tenacity==8.2.3
+tenacity==8.5.0
+threadpoolctl==3.5.0
 tiktoken==0.7.0
-tqdm==4.66.1
-types-requests==2.31.0.6
+tqdm==4.66.5
+types-requests==2.32.0.20240712
 types-urllib3==1.26.25.14
+typing-extensions==4.12.2
 typing-inspect==0.9.0
-typing_extensions==4.8.0
-tzdata==2023.3
-ujson==5.9.0
-unstructured==0.10.23
-urllib3==1.26.18
-uvicorn==0.23.2
-uvloop==0.17.0
-validators==0.22.0
-wandb==0.15.12
-watchfiles==0.20.0
-websockets==11.0.3
-yarl==1.9.2
-zipp==3.19.2
+tzdata==2024.1
+ujson==5.10.0
+unstructured==0.15.1
+unstructured-client==0.25.5
+urllib3==2.2.2
+uvicorn==0.30.6
+uvloop==0.19.0
+validators==0.33.0
+wandb==0.17.6
+watchfiles==0.23.0
+websockets==12.0
+wrapt==1.16.0
+yarl==1.9.4
+zipp==3.20.0

From 8777b3c0927348a5021b437da0614ee10de02c00 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 21:35:37 -0400
Subject: [PATCH 50/53] smoke test

---
 .github/workflows/pull-test-merge.yaml      | 16 ++++++++--------
 common/embeddings/milvus_embedding_store.py |  1 -
 common/llm_services/openai_service.py       |  5 ++++-
 eventual-consistency-service/app/main.py    |  7 ++++---
 4 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/pull-test-merge.yaml b/.github/workflows/pull-test-merge.yaml
index 20024b34..2c032524 100644
--- a/.github/workflows/pull-test-merge.yaml
+++ b/.github/workflows/pull-test-merge.yaml
@@ -12,12 +12,12 @@ jobs:
   test:
     runs-on: [ self-hosted, dind ]
 
-    services:
-      milvus:
-        image: milvusdb/milvus:latest
-        ports:
-          - 19530:19530
-          - 19121:19121
+    # services:
+    #   milvus:
+    #     image: milvusdb/milvus:latest
+    #     ports:
+    #       - 19530:19530
+    #       - 19121:19121
 
     steps:
       - name: Checkout code
@@ -30,6 +30,8 @@ jobs:
 
       - name: Install and Check Python Setup
         run: |
+          pip install uv
+          alias pip='uv pip'
           python -m venv venv
           source venv/bin/activate
           python -m pip install --upgrade pip
@@ -64,8 +66,6 @@ jobs:
       - name: Run pytest
         run: |
           source venv/bin/activate
-          pip install -r copilot/requirements.txt
-          pip install -U langchain langchain-core
           cp -r copilot/tests/*test* copilot/tests/create_wandb_report.py copilot/app/ 
           cd copilot/app
           python -m pytest --disable-warnings
diff --git a/common/embeddings/milvus_embedding_store.py b/common/embeddings/milvus_embedding_store.py
index c60a8e2e..7169379e 100644
--- a/common/embeddings/milvus_embedding_store.py
+++ b/common/embeddings/milvus_embedding_store.py
@@ -8,7 +8,6 @@
 from langchain_community.vectorstores import Milvus
 from langchain_core.documents.base import Document
 from langchain_milvus.vectorstores import Milvus
-# from langchain_community.vectorstores.milvus import Milvus
 from pymilvus import MilvusException, connections, utility
 from pymilvus.exceptions import MilvusException
 
diff --git a/common/llm_services/openai_service.py b/common/llm_services/openai_service.py
index 4f70b8cf..aad5d44f 100644
--- a/common/llm_services/openai_service.py
+++ b/common/llm_services/openai_service.py
@@ -1,7 +1,10 @@
 import logging
 import os
 
-from langchain_openai.chat_models import ChatOpenAI
+if os.getenv("ECC"):
+    from langchain_openai.chat_models import ChatOpenAI
+else:
+    from langchain_community.chat_models import ChatOpenAI
 
 from common.llm_services import LLM_Model
 from common.logs.log import req_id_cv
diff --git a/eventual-consistency-service/app/main.py b/eventual-consistency-service/app/main.py
index 34403f1e..2c308074 100644
--- a/eventual-consistency-service/app/main.py
+++ b/eventual-consistency-service/app/main.py
@@ -1,3 +1,6 @@
+import os
+
+os.environ["ECC"] = True
 import json
 import logging
 from contextlib import asynccontextmanager
@@ -190,9 +193,7 @@ def consistency_status(
             background.add_task(graphrag.run, graphname, conn)
             import time
 
-            ecc_status = (
-                f"GraphRAG initialization: {conn.graphname} ({graphname}) {time.ctime()}"
-            )
+            ecc_status = f"GraphRAG initialization: {conn.graphname} ({graphname}) {time.ctime()}"
         case _:
             response.status_code = status.HTTP_404_NOT_FOUND
             return f"Method unsupported, must be {SupportAIMethod.SUPPORTAI}, {SupportAIMethod.GRAPHRAG}"

From 69a7db449135d2a9413d53238b3a23906043da54 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 21:38:32 -0400
Subject: [PATCH 51/53] smoke test

---
 .github/workflows/pull-test-merge.yaml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/pull-test-merge.yaml b/.github/workflows/pull-test-merge.yaml
index 2c032524..19e1ab08 100644
--- a/.github/workflows/pull-test-merge.yaml
+++ b/.github/workflows/pull-test-merge.yaml
@@ -12,12 +12,12 @@ jobs:
   test:
     runs-on: [ self-hosted, dind ]
 
-    # services:
-    #   milvus:
-    #     image: milvusdb/milvus:latest
-    #     ports:
-    #       - 19530:19530
-    #       - 19121:19121
+    services:
+      milvus:
+        image: milvusdb/milvus:latest
+        ports:
+          - 19530:19530
+          - 19121:19121
 
     steps:
       - name: Checkout code

From 4dfa51cefb6b5364894920efb58ecedc54760ef6 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 21:41:41 -0400
Subject: [PATCH 52/53] smoke test

---
 .github/workflows/pull-test-merge.yaml      | 2 --
 common/embeddings/milvus_embedding_store.py | 3 ++-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pull-test-merge.yaml b/.github/workflows/pull-test-merge.yaml
index 19e1ab08..654703d8 100644
--- a/.github/workflows/pull-test-merge.yaml
+++ b/.github/workflows/pull-test-merge.yaml
@@ -30,8 +30,6 @@ jobs:
 
       - name: Install and Check Python Setup
         run: |
-          pip install uv
-          alias pip='uv pip'
           python -m venv venv
           source venv/bin/activate
           python -m pip install --upgrade pip
diff --git a/common/embeddings/milvus_embedding_store.py b/common/embeddings/milvus_embedding_store.py
index 7169379e..de7812fd 100644
--- a/common/embeddings/milvus_embedding_store.py
+++ b/common/embeddings/milvus_embedding_store.py
@@ -7,7 +7,8 @@
 from asyncer import asyncify
 from langchain_community.vectorstores import Milvus
 from langchain_core.documents.base import Document
-from langchain_milvus.vectorstores import Milvus
+# from langchain_milvus.vectorstores import Milvus
+from langchain_community.vectorstores.milvus import Milvus
 from pymilvus import MilvusException, connections, utility
 from pymilvus.exceptions import MilvusException
 

From 56f8e16bc72fa5dbb0985db81bef71419c274ec2 Mon Sep 17 00:00:00 2001
From: RobRossmiller-TG <165701656+RobRossmiller-TG@users.noreply.github.com>
Date: Tue, 13 Aug 2024 22:10:57 -0400
Subject: [PATCH 53/53] working

---
 copilot/requirements.txt                               | 1 +
 eventual-consistency-service/app/graphrag/graph_rag.py | 2 +-
 eventual-consistency-service/app/main.py               | 4 ++--
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/copilot/requirements.txt b/copilot/requirements.txt
index 5aed6147..4a5ac3d1 100644
--- a/copilot/requirements.txt
+++ b/copilot/requirements.txt
@@ -1,3 +1,4 @@
+aiochannel==1.2.1
 aiohappyeyeballs==2.3.5
 aiohttp==3.10.3
 aiosignal==1.3.1
diff --git a/eventual-consistency-service/app/graphrag/graph_rag.py b/eventual-consistency-service/app/graphrag/graph_rag.py
index 86f172b8..ecca36b2 100644
--- a/eventual-consistency-service/app/graphrag/graph_rag.py
+++ b/eventual-consistency-service/app/graphrag/graph_rag.py
@@ -437,5 +437,5 @@ async def run(graphname: str, conn: TigerGraphConnection):
     end = time.perf_counter()
     logger.info(f"DONE. graphrag system initializer dT: {init_end-init_start}")
     logger.info(f"DONE. graphrag entity resolution dT: {entity_end-entity_start}")
-    logger.info(f"DONE. graphrag initializer dT: {community_end-community_start}")
+    logger.info(f"DONE. graphrag community initializer dT: {community_end-community_start}")
     logger.info(f"DONE. graphrag.run() total time elaplsed: {end-init_start}")
diff --git a/eventual-consistency-service/app/main.py b/eventual-consistency-service/app/main.py
index 2c308074..2ccc10e2 100644
--- a/eventual-consistency-service/app/main.py
+++ b/eventual-consistency-service/app/main.py
@@ -1,6 +1,6 @@
 import os
 
-os.environ["ECC"] = True
+os.environ["ECC"] = "true"
 import json
 import logging
 from contextlib import asynccontextmanager
@@ -193,7 +193,7 @@ def consistency_status(
             background.add_task(graphrag.run, graphname, conn)
             import time
 
-            ecc_status = f"GraphRAG initialization: {conn.graphname} ({graphname}) {time.ctime()}"
+            ecc_status = f"GraphRAG initialization on {conn.graphname} {time.ctime()}"
         case _:
             response.status_code = status.HTTP_404_NOT_FOUND
             return f"Method unsupported, must be {SupportAIMethod.SUPPORTAI}, {SupportAIMethod.GRAPHRAG}"