From 633ca2fb3ce5d2e7486db1af98daa207ece55951 Mon Sep 17 00:00:00 2001 From: Jeremy Nelson Date: Wed, 15 Nov 2023 15:13:23 -0800 Subject: [PATCH] Sinopia-to-FOLIO workflow --- index.html | 38 ++++++++++++++----- src/controls.py | 2 +- src/folio.py | 49 +++++++++++++++++++++++- src/workflows.py | 99 +++++++++++++++++++++++++++++++----------------- 4 files changed, 141 insertions(+), 47 deletions(-) diff --git a/index.html b/index.html index 9d3177c..ab7a57d 100644 --- a/index.html +++ b/index.html @@ -67,7 +67,7 @@ run_prompt ) - from folio import Okapi, get_instance + from folio import Okapi, get_instance, logout_folio from folio import login as okapi_login from workflows import AssignLCSH, MARC21toFOLIO, NewResource @@ -179,7 +179,7 @@

Chatting with Catalogs

AI4LAM Hex Sticker and Logo
- Bugs? Ideas for improvement? Please add an issue here. + Bugs? Ideas for improving? Please add a ticket here.
@@ -435,7 +435,8 @@

Workflows

Workflows in this app are inspired by Apache Airflow and - Prefect.io. + Prefect.io. A workflow wraps LLM API calls along with calling + specific FOLIO Okapi and Sinopia APIs by building context and examples specific to the use case.

Basic Cataloging Workflow Flowchart
@@ -448,14 +449,33 @@

Roadmap

    -
  • Support for more Large Language Models (Llama2, Claude, Bard, etc.)
  • - +
  • Support for more Large Language Models (Llama2, Claude, Bard, etc.) ticket #1
  • +
  • Provide API for LCSH Vector datastore
  • +
  • Implement Vector datastore for FOLIO
  • +
  • Implement Vector datastore for Sinopia
  • + +

Documentation

+
+
+

About this App

+
+
+ The Chatting with Catalogs application is built as a static website using the + pyscript that loads a Python 3.1x environment into your + web-browser through Webassembly. We use the following Python packages: + +
+

OpenAI API

@@ -503,10 +523,10 @@

ai4lam Metadata Working Group

-
+
@@ -620,7 +640,7 @@
-
This Edge AI API connets to either a local or remote API endpoint.
+
This Edge AI API connects to either a local or remote API endpoint.
""" + modal_body.appendChild(div_logout) # Goal 1: Automate loading of vendor MARC records @@ -110,6 +115,9 @@ async def login(okapi: Okapi): folio_button = document.getElementById("folioButton") folio_button.classList.remove("btn-outline-danger") folio_button.classList.add("btn-outline-success") + default_folio = document.getElementById("folio-default") + if not "d-none" in default_folio.classList: + default_folio.classList.add("d-none") localStorage.setItem("okapi", okapi.json()) services() else: @@ -127,6 +135,7 @@ async def load_marc_record(marc_file): return str(marc_record) + async def get_instance(okapi, uuid): kwargs = {"headers": okapi.headers()} @@ -216,5 +225,41 @@ async def get_instance_types() -> dict: return output def load_instance(url): + default_folio = document.getElementById("folio-default") + if not "d-none" in default_folio.classList: + default_folio.classList.add("d-none") folio_iframe = document.getElementById("folio-system-frame") - folio_iframe.src = url + folio_iframe.src = url + +def logout_folio(): + modal_body = document.getElementById("folioModalBody") + modal_label = document.getElementById("folioModalLabel") + modal_label.innerHTML = "Login to FOLIO" + modal_body ="""
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+ +
""" + return None + diff --git a/src/workflows.py b/src/workflows.py index 8d4811a..a46ba2f 100644 --- a/src/workflows.py +++ b/src/workflows.py @@ -81,6 +81,23 @@ async def get_types(self): if self.instance_types is None: self.instance_types = await get_instance_types() + def __update_record__(self, record): + record["instanceTypeId"] = self.instance_types.get("unspecified") + for identifier in record.get("identifiers", []): + if "identifierTypeName" in identifier: + ident_name = identifier.pop("identifierTypeName") + if ident_name.upper().startswith("OCLC"): + ident_name = "OCLC" + identifier["identifierTypeId"] = self.identifier_types.get(ident_name) + + for contributor in record.get("contributors", []): + contributor["contributorTypeId"] = self.contributor_types.get( + contributor.get("contributorTypeText", "Contributor") + ) + contributor[ + "contributorNameTypeId" + ] = self.contributor_name_types.get("Personal name") + class AssignLCSH(WorkFlow): name = "Assign Library of Congress Subject Heading to record" @@ -176,22 +193,6 @@ def __init__(self, zero_shot=False): super().__init__() self.zero_shot = zero_shot - def __update_record__(self, record): - record["instanceTypeId"] = self.instance_types.get("unspecified") - for identifier in record.get("identifiers", []): - if "identifierTypeName" in identifier: - ident_name = identifier.pop("identifierTypeName") - if ident_name.upper().startswith("OCLC"): - ident_name = "OCLC" - identifier["identifierTypeId"] = self.identifier_types.get(ident_name) - - for contributor in record.get("contributors", []): - contributor["contributorTypeId"] = self.contributor_types.get( - contributor.get("contributorTypeText", "Contributor") - ) - contributor[ - "contributorNameTypeId" - ] = self.contributor_name_types.get("Personal name") @@ -263,17 +264,9 @@ async def __handle_func__(self, function_call): match function_name: case "add_instance": record = json.loads(args.get("record")) - record["instanceTypeId"] = self.instance_types.get("unspecified") - for contributor in record.get("contributors", []): - contributor["contributorTypeId"] = self.contributor_types.get( - contributor.get("contributorTypeText"), - "Contributor" - ) - contributor[ - "contributorNameTypeId" - ] = self.contributor_name_types.get("Personal name") - instance_url = await add_instance(json.dumps(record)) - output = instance_url + self.__update_record__(record) + instance_url = await add_instance(json.dumps(record)) + output = instance_url case "load_instance": instance_url = json.loads(args.get("instance_url")) @@ -357,9 +350,43 @@ def __init__(self, zero_shot=False): super().__init__() self.zero_shot = zero_shot + + + async def __handle_func__(self, function_call) -> str: + function_name = function_call.get("name") + args = json.loads(function_call.get("arguments")) + output = None + match function_name: + + case "add_instance": + + record = json.loads(args.get("record")) + self.__update_record__(record) + console.log(f"SinopiaToFOLIO {record}") + instance_url = await add_instance(json.dumps(record)) + console.log(f"After SinopiaToFOLIO func call {instance_url}") + output = instance_url + + case "load_sinopia": + sinopia_rdf = await load_sinopia(args.get("resource_url")) + prompt = "Create a FOLIO Instance JSON record from" + add_history(f"{prompt}
{sinopia_rdf}
", "prompt") + output = f"{prompt}\n{sinopia_rdf}" + + case _: + output = f"Unknown function {function_name}" + + + return output + + async def system(self): system_prompt = SinopiaToFOLIO.system_prompt + if self.instance_types is None: + await self.get_types() + + if self.zero_shot is False: system_prompt = f"{system_prompt}\n\nExamples" system_prompt += "\n".join(self.examples) @@ -373,11 +400,13 @@ async def run(self, chat_instance: ChatGPT, initial_prompt: str): add_history(chat_result, "response") function_call = chat_result["choices"][0]["message"].get("function_call") if function_call: - function_name = function_call.get("name") - args = json.loads(function_call.get("arguments")) - if function_name == "load_sinopia": - sinopia_rdf = await load_sinopia(args.get("resource_url")) - add_history(f"
{sinopia_rdf}
", "prompt") - chat_result_rdf = await chat_instance(sinopia_rdf) - add_history(chat_result_rdf, "response") - + first_result = await self.__handle_func__(function_call) + chat_result_rdf = await chat_instance(first_result) + final_func_call = chat_result_rdf["choices"][0]["message"].get("function_call") + final_result = await self.__handle_func__(final_func_call) + console.log(f"The final result {final_result}") + add_history(final_result, "response") + load_instance(final_result) + + return final_result + return "Workflow finished without completing"