From 792c8ac331ad366031ccc47402503c9843a95193 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 11 Dec 2023 12:54:26 -0500 Subject: [PATCH] add checks and workarounds for flaky breaking ipywidgets --- nlp/notebooks/NLU_demo.ipynb | 278 ++++++++++++++++++++++------------- nlp/notebooks/nlu_demo.py | 28 ++-- 2 files changed, 194 insertions(+), 112 deletions(-) diff --git a/nlp/notebooks/NLU_demo.ipynb b/nlp/notebooks/NLU_demo.ipynb index 222ce02..2dd2c4f 100644 --- a/nlp/notebooks/NLU_demo.ipynb +++ b/nlp/notebooks/NLU_demo.ipynb @@ -28,8 +28,8 @@ "execution_count": 1, "metadata": { "ExecuteTime": { - "end_time": "2023-12-09T00:05:30.390925184Z", - "start_time": "2023-12-09T00:05:25.166125745Z" + "end_time": "2023-12-11T17:07:45.834529730Z", + "start_time": "2023-12-11T17:07:41.428211954Z" } }, "outputs": [ @@ -37,9 +37,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "/home/francis/dev/miniconda/envs/pavics-jupyter-images-nlp/lib/python3.9/site-packages/torch/cuda/__init__.py:138: UserWarning: CUDA initialization: The NVIDIA driver on your system is too old (found version 11040). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver. (Triggered internally at ../c10/cuda/CUDAFunctions.cpp:108.)\n", + "/home/francis/.conda/envs/pavics-jupyter-images-nlp/lib/python3.9/site-packages/torch/cuda/__init__.py:138: UserWarning: CUDA initialization: The NVIDIA driver on your system is too old (found version 11040). Please update your GPU driver by downloading and installing a new version from the URL: http://www.nvidia.com/Download/index.aspx Alternatively, go to: https://pytorch.org to install a PyTorch version that has been compiled with your version of the CUDA driver. (Triggered internally at ../c10/cuda/CUDAFunctions.cpp:108.)\n", " return torch._C._cuda_getDeviceCount() > 0\n", - "/home/francis/dev/miniconda/envs/pavics-jupyter-images-nlp/lib/python3.9/site-packages/torch/cuda/__init__.py:611: UserWarning: Can't initialize NVML\n", + "/home/francis/.conda/envs/pavics-jupyter-images-nlp/lib/python3.9/site-packages/torch/cuda/__init__.py:611: UserWarning: Can't initialize NVML\n", " warnings.warn(\"Can't initialize NVML\")\n" ] }, @@ -54,7 +54,7 @@ "source": [ "# initialize the demo class\n", "from nlu_demo import NLU_demo\n", - "nlu = NLU_demo()" + "nlu = NLU_demo(verbose=True)" ] }, { @@ -76,21 +76,31 @@ "execution_count": 2, "metadata": { "ExecuteTime": { - "end_time": "2023-12-09T00:05:30.443216958Z", - "start_time": "2023-12-09T00:05:30.395143932Z" + "end_time": "2023-12-11T17:07:45.946102302Z", + "start_time": "2023-12-11T17:07:45.826572915Z" } }, "outputs": [ { "data": { + "text/plain": "interactive(children=(Output(),), _dom_classes=('widget-interact',))", "application/vnd.jupyter.widget-view+json": { - "model_id": "19607a6379a04bac9cf9fdc0f3ed6356", "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Dropdown(description='Query:', index=11, layout=Layout(width='max-content'), options=('10 years rainfall data …" - ] + "version_minor": 0, + "model_id": "cb1f41bfd91d434f8e0c09b88de733ac" + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": "Dropdown(description='Query:', index=11, layout=Layout(width='max-content'), options=('10 years rainfall data …", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "5be24dc171544f4d955b51b7f3d8e717" + } }, "execution_count": 2, "metadata": {}, @@ -107,30 +117,14 @@ "execution_count": 3, "metadata": { "ExecuteTime": { - "end_time": "2023-12-09T00:05:30.443782882Z", - "start_time": "2023-12-09T00:05:30.442203088Z" + "end_time": "2023-12-11T17:07:45.948085079Z", + "start_time": "2023-12-11T17:07:45.944861966Z" } }, "outputs": [ { "data": { - "text/plain": [ - "{'query': 'CO2 concentration in atmosphere in london',\n", - " 'annotations': [{'text': 'london',\n", - " 'position': [35, 41],\n", - " 'type': 'location',\n", - " 'matchingType': 'overlap',\n", - " 'name': 'London, England',\n", - " 'value': {'type': 'Polygon', 'coordinates': [[]]}},\n", - " {'text': 'CO2 concentration in atmosphere',\n", - " 'position': [0, 31],\n", - " 'type': 'target',\n", - " 'name': ['mole fraction of carbon dioxide in air',\n", - " 'co2s',\n", - " 'co2',\n", - " 'Atmosphere CO2',\n", - " 'Mole Fraction of CO2']}]}" - ] + "text/plain": "{'query': 'CO2 concentration in atmosphere in london',\n 'annotations': [{'text': 'london',\n 'position': [35, 41],\n 'type': 'location',\n 'matchingType': 'overlap',\n 'name': 'London, England',\n 'value': {'type': 'Polygon', 'coordinates': [[]]}},\n {'text': 'CO2 concentration in atmosphere',\n 'position': [0, 31],\n 'type': 'target',\n 'name': ['mole fraction of carbon dioxide in air',\n 'co2s',\n 'co2',\n 'Atmosphere CO2',\n 'Mole Fraction of CO2']}]}" }, "execution_count": 3, "metadata": {}, @@ -151,33 +145,43 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 19, "metadata": { "ExecuteTime": { - "end_time": "2023-12-09T00:05:30.444371462Z", - "start_time": "2023-12-09T00:05:30.442479251Z" + "end_time": "2023-12-11T17:48:51.297072995Z", + "start_time": "2023-12-11T17:48:51.253863460Z" } }, "outputs": [ { "data": { + "text/plain": "interactive(children=(Output(),), _dom_classes=('widget-interact',))", "application/vnd.jupyter.widget-view+json": { - "model_id": "f702eadcd9eb4040988a5095781ddd11", "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Textarea(value='', description='Your query:', layout=Layout(height='auto', width='800px'), placeholder='yearly…" - ] + "version_minor": 0, + "model_id": "287df93c24ae410a8a3330bd6efaf59e" + } }, - "execution_count": 4, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": "Textarea(value='CO2 concentration in atmosphere in london', description='Your query:', layout=Layout(height='a…", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "511f12e8d89b4540865ea2d60a44b9af" + } + }, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# write your query here\n", - "nlu.custom_query()" + "nlu.custom_query(value=\"CO2 concentration in atmosphere in london\")" ] }, { @@ -193,26 +197,36 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 20, "metadata": { "ExecuteTime": { - "end_time": "2023-12-09T00:05:30.444705052Z", - "start_time": "2023-12-09T00:05:30.442665547Z" + "end_time": "2023-12-11T17:48:54.353972826Z", + "start_time": "2023-12-11T17:48:54.301123751Z" } }, "outputs": [ { "data": { + "text/plain": "interactive(children=(Output(),), _dom_classes=('widget-interact',))", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "25176d1192764e638ddcdd23b894ab38" + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": "Select(description='System Version:', index=2, options=('V1', 'V2', 'V3'), rows=3, style=DescriptionStyle(desc…", "application/vnd.jupyter.widget-view+json": { - "model_id": "5dd7b5acf606497794f45a303285e62b", "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Select(description='System Version:', index=2, options=('V1', 'V2', 'V3'), rows=3, style=DescriptionStyle(desc…" - ] + "version_minor": 0, + "model_id": "d45cabd2d5b640afac247ea34f923554" + } }, - "execution_count": 5, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -248,11 +262,11 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 21, "metadata": { "ExecuteTime": { - "end_time": "2023-12-09T00:05:30.845764362Z", - "start_time": "2023-12-09T00:05:30.442913869Z" + "end_time": "2023-12-11T17:49:22.879983089Z", + "start_time": "2023-12-11T17:48:57.753240269Z" } }, "outputs": [ @@ -260,22 +274,39 @@ "name": "stdout", "output_type": "stream", "text": [ - "Reading config file: /home/francis/dev/daccs/pavics-jupyter-images/nlp/notebooks/nl2query/V1/v1_config.cfg\n", - "Reading config file: nl2query/V1/spacy_config.cfg\n", - "Reading config file: nl2query/V1/flair_config.cfg\n", - "2023-12-08 23:27:44,424 SequenceTagger predicts: Dictionary with 20 tags: , O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, , \n", - "Reading config file: /home/francis/dev/daccs/pavics-jupyter-images/nlp/notebooks/nl2query/V2/v2_config.cfg\n", - "Loading Chroma Vdb from... nl2query/V2/prop_vdb\n", - "Loading Chroma Vdb from... nl2query/V2/target_vdb\n" + "New query: CO2 concentration in atmosphere in london\n", + "SPACY:\n", + " london 35 41 GPE\n", + "FLAIR:\n", + " london 35 41 LOC 0.9999834299087524\n", + "LOCATION - V2:\n", + " {\n", + " \"text\": \"r\",\n", + " \"position\": [\n", + " 11,\n", + " 12\n", + " ],\n", + " \"type\": \"location\",\n", + " \"name\": \"Rio de Janeiro, Regi\\u00e3o Geogr\\u00e1fica Imediata do Rio de Janeiro, Regi\\u00e3o Metropolitana do Rio de Janeiro, Regi\\u00e3o Geogr\\u00e1fica Intermedi\\u00e1ria do Rio de Janeiro, Rio de Janeiro, Southeast Region, Brazil\",\n", + " \"value\": {\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " -22.7460878,\n", + " -23.0827051,\n", + " -43.0990811,\n", + " -43.796252\n", + " ]\n", + " },\n", + " \"matchingType\": \"overlap\"\n", + "}\n", + "New query: CO2 concentation in atmosphere in london\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/home/francis/dev/miniconda/envs/pavics-jupyter-images-nlp/lib/python3.9/site-packages/langchain_core/vectorstores.py:325: UserWarning: No relevant docs were retrieved using the relevance score threshold 0.72\n", - " warnings.warn(\n", - "/home/francis/dev/miniconda/envs/pavics-jupyter-images-nlp/lib/python3.9/site-packages/langchain_core/vectorstores.py:325: UserWarning: No relevant docs were retrieved using the relevance score threshold 0.72\n", + "/home/francis/.conda/envs/pavics-jupyter-images-nlp/lib/python3.9/site-packages/langchain_core/vectorstores.py:325: UserWarning: No relevant docs were retrieved using the relevance score threshold 0.72\n", " warnings.warn(\n" ] }, @@ -283,6 +314,32 @@ "name": "stdout", "output_type": "stream", "text": [ + "LOCATION - V1+V2:\n", + " {\n", + " \"text\": \"london\",\n", + " \"position\": [\n", + " 35,\n", + " 41\n", + " ],\n", + " \"type\": \"location\",\n", + " \"name\": \"London, Middlesex, Ontario (City)\",\n", + " \"value\": {\n", + " \"type\": \"Polygon\",\n", + " \"coordinates\": [\n", + " [\n", + " -81.4117051,\n", + " 42.8051495,\n", + " -81.0775729,\n", + " 43.0935564\n", + " ]\n", + " ]\n", + " },\n", + " \"matchingType\": \"overlap\"\n", + "}\n", + "New query: CO2 concentation in atmosphere in \n", + "\n", + "Removing stopwords\n", + "New query: CO2 concentation atmosphere\n", "NOT FOUND IN QUERY: concentation CO2 concentration in atmosphere in london ['concentation', 'atmosphere'] concentation atmosphere\n" ] }, @@ -291,13 +348,13 @@ "evalue": "substring not found", "output_type": "error", "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[6], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# run the NLU pipeline on the natural language query\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[43mnlu\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnl2query\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/dev/daccs/pavics-jupyter-images/nlp/notebooks/nlu_demo.py:104\u001b[0m, in \u001b[0;36mNLU_demo.nl2query\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mv3_instance \u001b[38;5;241m=\u001b[39m V3_pipeline(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mv1_config, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mv2_config)\n\u001b[1;32m 103\u001b[0m \u001b[38;5;66;03m# run V3 pipeline on query\u001b[39;00m\n\u001b[0;32m--> 104\u001b[0m v3_structq \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mv3_instance\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtransform_nl2query\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnlq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 105\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mV3 structured query: \u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 106\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstruct_query \u001b[38;5;241m=\u001b[39m v3_structq\u001b[38;5;241m.\u001b[39mto_dict()\n", - "File \u001b[0;32m~/dev/daccs/pavics-jupyter-images/nlp/notebooks/nl2query/V3/V3_pipeline.py:128\u001b[0m, in \u001b[0;36mV3_pipeline.transform_nl2query\u001b[0;34m(self, nlq, verbose)\u001b[0m\n\u001b[1;32m 126\u001b[0m targ_span, targ_results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mv2_instance\u001b[38;5;241m.\u001b[39mvdbs\u001b[38;5;241m.\u001b[39mquery_ngram_target(newq)\n\u001b[1;32m 127\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(targ_span) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m :\n\u001b[0;32m--> 128\u001b[0m targ_spans, pos \u001b[38;5;241m=\u001b[39m \u001b[43mV2_pipeline\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfind_spans\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtarg_span\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnlq\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 129\u001b[0m targ_annotation \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcreate_target_annotation([targ_spans, pos, targ_results])\n\u001b[1;32m 130\u001b[0m combined_annotations\u001b[38;5;241m.\u001b[39mappend(targ_annotation)\n", - "File \u001b[0;32m~/dev/daccs/pavics-jupyter-images/nlp/notebooks/nl2query/V2/V2_pipeline.py:48\u001b[0m, in \u001b[0;36mfind_spans\u001b[0;34m(span, query)\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m split \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m query:\n\u001b[1;32m 47\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNOT FOUND IN QUERY:\u001b[39m\u001b[38;5;124m\"\u001b[39m, split, query, splits, span)\n\u001b[0;32m---> 48\u001b[0m start \u001b[38;5;241m=\u001b[39m \u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[43m(\u001b[49m\u001b[43msplit\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 49\u001b[0m end \u001b[38;5;241m=\u001b[39m start \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mlen\u001b[39m(split)\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m span\u001b[38;5;241m.\u001b[39mstartswith(split):\n\u001b[1;32m 51\u001b[0m \u001b[38;5;66;03m# first iteration\u001b[39;00m\n", - "\u001b[0;31mValueError\u001b[0m: substring not found" + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mValueError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[21], line 2\u001B[0m\n\u001B[1;32m 1\u001B[0m \u001B[38;5;66;03m# run the NLU pipeline on the natural language query\u001B[39;00m\n\u001B[0;32m----> 2\u001B[0m \u001B[43mnlu\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mnl2query\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n", + "File \u001B[0;32m~/dev/daccs/pavics-jupyter-images/nlp/notebooks/nlu_demo.py:109\u001B[0m, in \u001B[0;36mNLU_demo.nl2query\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 106\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[1;32m 107\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mv3_instance:\n\u001B[1;32m 108\u001B[0m \u001B[38;5;66;03m# initialize V3\u001B[39;00m\n\u001B[0;32m--> 109\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mv3_instance \u001B[38;5;241m=\u001B[39m V3_pipeline(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mv1_config, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mv2_config)\n\u001B[1;32m 110\u001B[0m \u001B[38;5;66;03m# run V3 pipeline on query\u001B[39;00m\n\u001B[1;32m 111\u001B[0m v3_structq \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mv3_instance\u001B[38;5;241m.\u001B[39mtransform_nl2query(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mnlq, verbose\u001B[38;5;241m=\u001B[39m\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mverbose)\n", + "File \u001B[0;32m~/dev/daccs/pavics-jupyter-images/nlp/notebooks/nl2query/V3/V3_pipeline.py:128\u001B[0m, in \u001B[0;36mV3_pipeline.transform_nl2query\u001B[0;34m(self, nlq, verbose)\u001B[0m\n\u001B[1;32m 126\u001B[0m targ_span, targ_results \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mv2_instance\u001B[38;5;241m.\u001B[39mvdbs\u001B[38;5;241m.\u001B[39mquery_ngram_target(newq)\n\u001B[1;32m 127\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mlen\u001B[39m(targ_span) \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m :\n\u001B[0;32m--> 128\u001B[0m targ_spans, pos \u001B[38;5;241m=\u001B[39m \u001B[43mV2_pipeline\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfind_spans\u001B[49m\u001B[43m(\u001B[49m\u001B[43mtarg_span\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mnlq\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 129\u001B[0m targ_annotation \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcreate_target_annotation([targ_spans, pos, targ_results])\n\u001B[1;32m 130\u001B[0m combined_annotations\u001B[38;5;241m.\u001B[39mappend(targ_annotation)\n", + "File \u001B[0;32m~/dev/daccs/pavics-jupyter-images/nlp/notebooks/nl2query/V2/V2_pipeline.py:48\u001B[0m, in \u001B[0;36mfind_spans\u001B[0;34m(span, query)\u001B[0m\n\u001B[1;32m 46\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m split \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;129;01min\u001B[39;00m query:\n\u001B[1;32m 47\u001B[0m \u001B[38;5;28mprint\u001B[39m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mNOT FOUND IN QUERY:\u001B[39m\u001B[38;5;124m\"\u001B[39m, split, query, splits, span)\n\u001B[0;32m---> 48\u001B[0m start \u001B[38;5;241m=\u001B[39m \u001B[43mquery\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mindex\u001B[49m\u001B[43m(\u001B[49m\u001B[43msplit\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 49\u001B[0m end \u001B[38;5;241m=\u001B[39m start \u001B[38;5;241m+\u001B[39m \u001B[38;5;28mlen\u001B[39m(split)\n\u001B[1;32m 50\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m span\u001B[38;5;241m.\u001B[39mstartswith(split):\n\u001B[1;32m 51\u001B[0m \u001B[38;5;66;03m# first iteration\u001B[39;00m\n", + "\u001B[0;31mValueError\u001B[0m: substring not found" ] } ], @@ -335,26 +392,63 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "ExecuteTime": { - "start_time": "2023-12-09T00:05:30.886357687Z" + "end_time": "2023-12-11T17:10:20.250279570Z", + "start_time": "2023-12-11T17:10:20.206453Z" } }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": "Dropdown(description='Select catalog:', options=('hirondelle_crim', 'uoft', 'pavics_ouranos', 'earth_aws', 'pa…", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "53ae287b39904e8fa0884356c2ae7024" + } + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "nlu.select_stac_catalog()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "ExecuteTime": { - "start_time": "2023-12-09T00:05:30.886609596Z" + "end_time": "2023-12-11T17:10:29.999055822Z", + "start_time": "2023-12-11T17:10:24.526252576Z" } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Created STAC query with the parameters:\n", + " {'bbox': [-76.3631149, 44.9445516, -75.2324963, 45.544859], 'datetime': ['2000-01-01T00:00:00Z', '2021-01-01T00:00:00Z'], 'query': ['precipitation amount', 'stratiform precipitation amount', 'large scale precipitation amount', 'stratiform precipitation flux', 'large scale precipitation flux', 'stratiform rainfall amount', 'large scale rainfall amount', 'solid precipitation flux', 'stratiform rainfall rate', 'large scale rainfall rate', 'convective precipitation amount', 'lwe stratiform precipitation rate', 'lwe large scale precipitation rate', 'tendency of specific humidity due to stratiform precipitation', 'tendency of specific humidity due to large scale precipitation', 'rainfall amount', 'stratiform graupel fall amount', 'solid precipitation flux containing single 2H', 'prsn2h', 'Precipitation Flux of Snow and Ice Containing Deuterium (1H 2H O)', 'tendency of air temperature due to stratiform precipitation', 'tendency of air temperature due to large scale precipitation', 'convective precipitation flux', 'prc', 'Convective Precipitation', 'mass fraction of rainfall falling onto surface snow', 'prrsn', 'Fraction of Rainfall on Snow'], 'collections': []}\n", + "Opening catalog: Data Analytics for Canadian Climate Services STAC API\n", + "Searching catalog: https://hirondelle.crim.ca/stac\n", + "QUERY: {'url': 'https://hirondelle.crim.ca/stac/search', 'client': , '_stac_io': , '_max_items': 10, 'method': 'GET', 'modifier': None, '_parameters': {'bbox': (-76.3631149, 44.9445516, -75.2324963, 45.544859), 'datetime': '2000-01-01T00:00:00Z/2021-01-01T00:00:00Z', 'collections': (), 'query': {}}}\n" + ] + }, + { + "data": { + "text/plain": "[]", + "text/html": "\n \n \n \n \n " + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "nlu.run_stac_query()" ] @@ -406,28 +500,6 @@ " }\n", "nlu.run_custom_stac_query(params)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "start_time": "2023-12-09T00:05:30.887024764Z" - } - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "start_time": "2023-12-09T00:05:30.887141938Z" - } - }, - "outputs": [], - "source": [] } ], "metadata": { @@ -435,9 +507,9 @@ "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" }, "kernelspec": { - "display_name": "Python [conda env:pavics-jupyter-images-nlp]", + "name": "conda-env-.conda-pavics-jupyter-images-nlp-py", "language": "python", - "name": "conda-env-pavics-jupyter-images-nlp-py" + "display_name": "Python [conda env:.conda-pavics-jupyter-images-nlp]" }, "language_info": { "codemirror_mode": { diff --git a/nlp/notebooks/nlu_demo.py b/nlp/notebooks/nlu_demo.py index dca3905..0e19b04 100644 --- a/nlp/notebooks/nlu_demo.py +++ b/nlp/notebooks/nlu_demo.py @@ -3,6 +3,7 @@ from pprint import pprint import ipywidgets as widgets +from ipywidgets import interact from nl2query.V1.V1_pipeline import V1_pipeline from nl2query.V2.V2_pipeline import V2_pipeline @@ -14,7 +15,8 @@ class NLU_demo: """class to handle all necessary widgets and functions of the NLU demo notebook""" - def __init__(self): + def __init__(self, verbose: bool = False) -> None: + self.verbose = verbose # initialize pipelines self.path = os.path.dirname(os.path.realpath(__file__)) self.v1_config = os.path.join(self.path, "nl2query/V1/v1_config.cfg") @@ -31,6 +33,7 @@ def __init__(self): self.gold_queries = goldq["queries"] self.query_list = [query['query'] for _,query in enumerate(self.gold_queries)] # setup visual widgets + # NOTE: to ensure widgets render correctly, wrap them in 'interact' before returning them for the notebook self.select_query = widgets.Dropdown( options=self.query_list, value=self.query_list[11], @@ -56,19 +59,21 @@ def __init__(self): def select_gold_query(self): """return select gold query dropdown""" - return self.select_query + return interact(self.select_query) def get_gold_annotations(self): """get the annotations for the selected gold query""" return self.gold_queries[self.select_query.index] - def custom_query(self): + def custom_query(self, value=""): """return textbox to write your own query""" - return self.write_query + if value: + self.write_query.value = value + return interact(self.write_query) def select_nlu_version(self): """return a selection widget for selecting the system version""" - return self.select_version + return interact(self.select_version) def nl2query(self): """takes the selected query (gold or custom), @@ -80,12 +85,17 @@ def nl2query(self): self.nlq = self.write_query.value else: self.nlq = self.select_query.value + if not self.nlq: + raise ValueError( + "Natural Language Query is empty. Cannot run the query pipeline. " + "Either set the value with 'write_query' or 'select_query' widgets." + ) if self.select_version.value == "V1": if not self.v1_instance: #initialize V1 self.v1_instance = V1_pipeline(self.v1_config) # run V1 pipeline on this query - v1_structq = self.v1_instance.transform_nl2query(self.nlq, verbose=False) + v1_structq = self.v1_instance.transform_nl2query(self.nlq, verbose=self.verbose) print("\nV1 structured query: ") self.struct_query = v1_structq.to_dict() elif self.select_version.value == "V2": @@ -93,7 +103,7 @@ def nl2query(self): if not self.v2_instance: self.v2_instance = V2_pipeline(self.v2_config) # run V2 pipeline on this query - v2_structq = self.v2_instance.transform_nl2query(self.nlq, verbose=False) + v2_structq = self.v2_instance.transform_nl2query(self.nlq, verbose=self.verbose) print("\nV2 structured query: ") self.struct_query = v2_structq.to_dict() else: @@ -101,7 +111,7 @@ def nl2query(self): # initialize V3 self.v3_instance = V3_pipeline(self.v1_config, self.v2_config) # run V3 pipeline on query - v3_structq = self.v3_instance.transform_nl2query(self.nlq, verbose=False) + v3_structq = self.v3_instance.transform_nl2query(self.nlq, verbose=self.verbose) print("\nV3 structured query: ") self.struct_query = v3_structq.to_dict() return pprint(self.struct_query, sort_dicts=False) @@ -111,7 +121,7 @@ def select_stac_catalog(self): def run_stac_query(self): if self.struct_query: - return self.stac_handler.handle_query(self.struct_query, verbose=True) + return self.stac_handler.handle_query(self.struct_query, verbose=self.verbose) def run_custom_stac_query(self, params): return self.stac_handler.search_query(params=params)