Skip to content

Commit

Permalink
Sinopia-to-FOLIO workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
jermnelson committed Nov 15, 2023
1 parent 70176f8 commit 633ca2f
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 47 deletions.
38 changes: 29 additions & 9 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
run_prompt
)

from folio import Okapi, get_instance
from folio import Okapi, get_instance, logout_folio
from folio import login as okapi_login

from workflows import AssignLCSH, MARC21toFOLIO, NewResource
Expand Down Expand Up @@ -179,7 +179,7 @@ <h1>Chatting with Catalogs</h1>
<div class="col-2">
<img src="imgs/hexsticker.png" alt="AI4LAM Hex Sticker and Logo" style="height: 100px;">
<div>
<small class="align-middle">Bugs? Ideas for improvement? Please add an issue <a href="https://github.com/AI4LAM/catalog-chat/issues" target="_blank">here</a>.</small>
<small class="align-middle">Bugs? Ideas for improving? Please add a ticket <a href="https://github.com/AI4LAM/catalog-chat/issues" target="_blank">here</a>.</small>
</div>
</div>
</div>
Expand Down Expand Up @@ -435,7 +435,8 @@ <h3>Workflows</h3>
</header>
<p>
Workflows in this app are inspired by <a href="https://airflow.apache.org/">Apache Airflow</a> and
<a href="https://www.prefect.io/">Prefect.io</a>.
<a href="https://www.prefect.io/">Prefect.io</a>. A workflow wraps LLM API calls along with calling
specific FOLIO Okapi and Sinopia APIs by building context and examples specific to the use case.
</p>
<img src="imgs/workflow-flowchart.png" alt="Basic Cataloging Workflow Flowchart" />
<section>
Expand All @@ -448,14 +449,33 @@ <h3>Roadmap</h3>
</header>
<section>
<ul>
<li>Support for more Large Language Models (Llama2, Claude, Bard, etc.)</li>

<li>Support for more Large Language Models (Llama2, Claude, Bard, etc.) ticket <a href="https://github.com/AI4LAM/catalog-chat/issues/1">#1</a></li>
<li>Provide API for LCSH Vector datastore</li>
<li>Implement Vector datastore for FOLIO</li>
<li>Implement Vector datastore for Sinopia</li>


</ul>
</section>
</article>
</div>
<div class="col">
<h2>Documentation</h2>
<article id="about" class="container">
<header>
<h3>About this App</h3>
</header>
<section>
The <em>Chatting with Catalogs</em> application is built as a static website using the
<a href="https://pyscript.net/">pyscript</a> that loads a Python 3.1x environment into your
web-browser through Webassembly. We use the following Python packages:
<ul>
<li><a href="https://docs.pydantic.dev/latest/">Pydantic</a></li>
<li><a href="https://pymarc.readthedocs.io/en/latest/">Pymarc</a></li>
<li><a href="https://rdflib.readthedocs.io/en/stable/">RDFLib</a></li>
</ul>
</section>
</article>
<article id="openai-api" class="container">
<header>
<h3>OpenAI API</h3>
Expand Down Expand Up @@ -503,10 +523,10 @@ <h3>ai4lam Metadata Working Group</h3>
</ul>
</section>
</article>
<article class="container">
<!-- <article class="container">
<header>
<h3>Retrieval Augmented Generation (RAG)</h3>
</header>
</header> //-->

</article>

Expand Down Expand Up @@ -620,7 +640,7 @@ <h5 class="modal-title" id="edgeAIModalLabel">Enter Edge AI API URI</h5>
<div class="mb-3">
<label for="edgeAIURI" class="form-label">Edge AI API</label>
<input type="text" class="form-control" id="edgeAIURI" placeholder="Enter URI to the Edge AI API" aria-describedby="edgeAIapiHelp">
<div id="edgeAIapiHelp" class="form-text">This Edge AI API connets to either a local or remote API endpoint.</div>
<div id="edgeAIapiHelp" class="form-text">This Edge AI API connects to either a local or remote API endpoint.</div>
</div>
<button type="button"
data-bs-dismiss="modal"
Expand Down Expand Up @@ -794,7 +814,7 @@ <h5 class="offcanvas-title" id="pythonOffCanvasLabel">
</div>

</div>
<footer class="p-3 mb-5 rounded">
<footer class="p-2 rounded bg-secondary-subtle">
<small>
<p>
An experimental Chat application by AI4LAM Metadata Working Group.
Expand Down
2 changes: 1 addition & 1 deletion src/controls.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ async def run_prompt(workflow, chat_gpt_instance):
run_result = await workflow.run(chat_gpt_instance, current)
loading_spinner.classList.add("d-none")

# console.log(f"Run result {run_result}")
console.log(f"Run result {run_result}")
main_chat_textarea.value = ""


Expand Down
49 changes: 47 additions & 2 deletions src/folio.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,12 @@ def services():
# auto_vendor_marc_li = document.createElement("li")
# auto_vendor_marc_li.innerHTML = "Load Vendor MARC Records"
# folio_services_ol.appendChild(auto_vendor_marc_li)
modal_body.innerHTML = "Logged into FOLIO"
h4 = document.createElement("h4")
h4.innerHTML = "Logged into FOLIO"
modal_body.appendChild(h4)
div_logout = document.createElement("div")
div_logout.innerHTML = """<button class="btn btn-primary" py-script="okapi=logout_folio()">Log out</button>"""
modal_body.appendChild(div_logout)


# Goal 1: Automate loading of vendor MARC records
Expand Down Expand Up @@ -110,6 +115,9 @@ async def login(okapi: Okapi):
folio_button = document.getElementById("folioButton")
folio_button.classList.remove("btn-outline-danger")
folio_button.classList.add("btn-outline-success")
default_folio = document.getElementById("folio-default")
if not "d-none" in default_folio.classList:
default_folio.classList.add("d-none")
localStorage.setItem("okapi", okapi.json())
services()
else:
Expand All @@ -127,6 +135,7 @@ async def load_marc_record(marc_file):
return str(marc_record)



async def get_instance(okapi, uuid):
kwargs = {"headers": okapi.headers()}

Expand Down Expand Up @@ -216,5 +225,41 @@ async def get_instance_types() -> dict:
return output

def load_instance(url):
default_folio = document.getElementById("folio-default")
if not "d-none" in default_folio.classList:
default_folio.classList.add("d-none")
folio_iframe = document.getElementById("folio-system-frame")
folio_iframe.src = url
folio_iframe.src = url

def logout_folio():
modal_body = document.getElementById("folioModalBody")
modal_label = document.getElementById("folioModalLabel")
modal_label.innerHTML = "Login to FOLIO"
modal_body ="""<form>
<div class="mb-3">
<label for="folioURI" class="form-label">FOLIO URI</label>
<input class="form-control" id="folioURI">
</div>
<div class="mb-3">
<label for="okapiURI" class="form-label">Okapi URI</label>
<input class="form-control" id="okapiURI">
</div>
<div class="mb-3">
<label for="folioTenant" class="form-label">Tenant</label>
<input class="form-control" id="folioTenant">
</div>
<div class="mb-3">
<label for="folioUser" class="form-label">Username</label>
<input class="form-control" id="folioUser">
</div>
<div class="mb-3">
<label for="folioPassword" class="form-label">Password</label>
<input type="password" class="form-control" id="folioPassword">
</div>
<button type="button"
data-bs-dismiss="modal"
class="btn btn-primary"
py-click="asyncio.ensure_future(login_okapi())">Login</button>
</form>"""
return None

99 changes: 64 additions & 35 deletions src/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,23 @@ async def get_types(self):
if self.instance_types is None:
self.instance_types = await get_instance_types()

def __update_record__(self, record):
record["instanceTypeId"] = self.instance_types.get("unspecified")
for identifier in record.get("identifiers", []):
if "identifierTypeName" in identifier:
ident_name = identifier.pop("identifierTypeName")
if ident_name.upper().startswith("OCLC"):
ident_name = "OCLC"
identifier["identifierTypeId"] = self.identifier_types.get(ident_name)

for contributor in record.get("contributors", []):
contributor["contributorTypeId"] = self.contributor_types.get(
contributor.get("contributorTypeText", "Contributor")
)
contributor[
"contributorNameTypeId"
] = self.contributor_name_types.get("Personal name")


class AssignLCSH(WorkFlow):
name = "Assign Library of Congress Subject Heading to record"
Expand Down Expand Up @@ -176,22 +193,6 @@ def __init__(self, zero_shot=False):
super().__init__()
self.zero_shot = zero_shot

def __update_record__(self, record):
record["instanceTypeId"] = self.instance_types.get("unspecified")
for identifier in record.get("identifiers", []):
if "identifierTypeName" in identifier:
ident_name = identifier.pop("identifierTypeName")
if ident_name.upper().startswith("OCLC"):
ident_name = "OCLC"
identifier["identifierTypeId"] = self.identifier_types.get(ident_name)

for contributor in record.get("contributors", []):
contributor["contributorTypeId"] = self.contributor_types.get(
contributor.get("contributorTypeText", "Contributor")
)
contributor[
"contributorNameTypeId"
] = self.contributor_name_types.get("Personal name")



Expand Down Expand Up @@ -263,17 +264,9 @@ async def __handle_func__(self, function_call):
match function_name:
case "add_instance":
record = json.loads(args.get("record"))
record["instanceTypeId"] = self.instance_types.get("unspecified")
for contributor in record.get("contributors", []):
contributor["contributorTypeId"] = self.contributor_types.get(
contributor.get("contributorTypeText"),
"Contributor"
)
contributor[
"contributorNameTypeId"
] = self.contributor_name_types.get("Personal name")
instance_url = await add_instance(json.dumps(record))
output = instance_url
self.__update_record__(record)
instance_url = await add_instance(json.dumps(record))
output = instance_url

case "load_instance":
instance_url = json.loads(args.get("instance_url"))
Expand Down Expand Up @@ -357,9 +350,43 @@ def __init__(self, zero_shot=False):
super().__init__()
self.zero_shot = zero_shot



async def __handle_func__(self, function_call) -> str:
function_name = function_call.get("name")
args = json.loads(function_call.get("arguments"))
output = None
match function_name:

case "add_instance":

record = json.loads(args.get("record"))
self.__update_record__(record)
console.log(f"SinopiaToFOLIO {record}")
instance_url = await add_instance(json.dumps(record))
console.log(f"After SinopiaToFOLIO func call {instance_url}")
output = instance_url

case "load_sinopia":
sinopia_rdf = await load_sinopia(args.get("resource_url"))
prompt = "Create a FOLIO Instance JSON record from"
add_history(f"{prompt}<pre>{sinopia_rdf}</pre>", "prompt")
output = f"{prompt}\n{sinopia_rdf}"

case _:
output = f"Unknown function {function_name}"


return output


async def system(self):
system_prompt = SinopiaToFOLIO.system_prompt

if self.instance_types is None:
await self.get_types()


if self.zero_shot is False:
system_prompt = f"{system_prompt}\n\nExamples"
system_prompt += "\n".join(self.examples)
Expand All @@ -373,11 +400,13 @@ async def run(self, chat_instance: ChatGPT, initial_prompt: str):
add_history(chat_result, "response")
function_call = chat_result["choices"][0]["message"].get("function_call")
if function_call:
function_name = function_call.get("name")
args = json.loads(function_call.get("arguments"))
if function_name == "load_sinopia":
sinopia_rdf = await load_sinopia(args.get("resource_url"))
add_history(f"<pre>{sinopia_rdf}</pre>", "prompt")
chat_result_rdf = await chat_instance(sinopia_rdf)
add_history(chat_result_rdf, "response")

first_result = await self.__handle_func__(function_call)
chat_result_rdf = await chat_instance(first_result)
final_func_call = chat_result_rdf["choices"][0]["message"].get("function_call")
final_result = await self.__handle_func__(final_func_call)
console.log(f"The final result {final_result}")
add_history(final_result, "response")
load_instance(final_result)

return final_result
return "Workflow finished without completing"

0 comments on commit 633ca2f

Please sign in to comment.