Merge pull request #25 from Foreverskyin0216/fix-update-allowed-tags

fix: update allowed tags and structure
Foreverskyin0216 · Dec 29, 2024 · c3d90b7 · c3d90b7
2 parents 3187456 + 33e28de
commit c3d90b7
Show file tree

Hide file tree

Showing 29 changed files with 771 additions and 731 deletions.
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -28,7 +28,7 @@
     "prepublishOnly": "npx lerna run build"
   },
   "devDependencies": {
-    "@commitlint/cli": "^19.6.0",
+    "@commitlint/cli": "^19.6.1",
     "@commitlint/config-conventional": "^19.6.0",
     "@eslint/js": "^9.17.0",
     "@types/eslint__js": "^8.42.3",
@@ -37,15 +37,15 @@
     "eslint": "^9.17.0",
     "husky": "^9.1.7",
     "lerna": "^8.1.9",
-    "lint-staged": "^15.2.11",
+    "lint-staged": "^15.3.0",
     "prettier": "^3.4.2",
     "typescript": "^5.7.2",
-    "typescript-eslint": "^8.18.0",
-    "vite": "^6.0.3",
+    "typescript-eslint": "^8.18.2",
+    "vite": "^6.0.6",
     "vitest": "^2.1.8"
   },
   "optionalDependencies": {
-    "@rollup/rollup-linux-x64-gnu": "^4.28.1"
+    "@rollup/rollup-linux-x64-gnu": "^4.29.1"
   },
   "engines": {
     "node": ">=18"

diff --git a/packages/cli/README.md b/packages/cli/README.md
@@ -33,10 +33,10 @@ npx @playword/cli test [options]
 | ------------------ | ----- | -------------- | ---------- | ------------------------------------------------------------------------------------------------------------------ |
 | `--headed`         | `-h`  | bool           | **false**  | Whether to open the browser in headed mode.                                                                        |
 | `--env-file`       | `-e`  | string         | **.env**   | Which env file to use.                                                                                             |
-| `--record`         | `-r`  | bool, string   | **false**  | Whether to record the test steps.<br>You can also specify a file path to save the recording. (Must be a JSON file) |
+| `--record`         | `-r`  | string \| bool | **false**  | Whether to record the test steps.<br>You can also specify a file path to save the recording. (Must be a JSON file) |
 | `--playback`       | `-p`  | string         | **false**  | Whether to playback the test steps from a recording file.<br>This should be used with the `--record` option.       |
 | `--use-screenshot` | `-s`  | bool           | **false**  | Whether to enable screenshot reference.                                                                            |
 | `--browser`        | `-b`  | string         | **chrome** | Which browser to use. Supported values are `chromium`, `chrome`, `msedge`, `firefox` and `webkit`.                 |
 | `--verbose`        | `-v`  | bool           | **false**  | Whether to enable verbose mode.                                                                                    |
-| `--openai-options` | `-o`  | array          | **[]**     | Additional OpenAI API options. e.g.<br>`--openai-option apiKey=sk-... baseURL=https://...`                         |
+| `--openai-options` | `-o`  | string[]       | **[]**     | Additional OpenAI API options. e.g.<br>`-o apiKey=sk-... baseURL=https://...`                                      |
 | `--help`           |       | bool           | **false**  | Show help information.                                                                                             |
diff --git a/packages/cli/package.json b/packages/cli/package.json
@@ -28,16 +28,16 @@
     "dist"
   ],
   "dependencies": {
-    "@inquirer/confirm": "^5.1.0",
-    "@inquirer/input": "^4.1.0",
+    "@inquirer/confirm": "^5.1.1",
+    "@inquirer/input": "^4.1.1",
     "@playword/core": "^0.1.4",
     "dotenv": "^16.4.7",
     "playwright-core": "^1.49.1",
     "yargs": "^17.7.2"
   },
   "devDependencies": {
     "@types/yargs": "^17.0.33",
-    "openai": "^4.76.3",
+    "openai": "^4.77.0",
     "tsup": "^8.3.5"
   },
   "publishConfig": {

diff --git a/packages/core/README.md b/packages/core/README.md
@@ -5,7 +5,7 @@
 [![CI](https://github.com/Foreverskyin0216/playword/actions/workflows/ci.yml/badge.svg)](https://github.com/Foreverskyin0216/playword/actions/workflows/ci.yml)
 [![codecov](https://codecov.io/gh/Foreverskyin0216/playword/graph/badge.svg?token=8VO1EFXKDI)](https://codecov.io/gh/Foreverskyin0216/playword)
 
-Automate browsers with AI: What you say is what you get.
+Supercharge your Playwright tests with AI-powered natural language testing.
 
 ![PlayWord](https://i.ibb.co/JtfJvXH/demo1.gif)
 

diff --git a/packages/core/package.json b/packages/core/package.json
@@ -35,11 +35,11 @@
     }
   },
   "dependencies": {
-    "@langchain/core": "^0.3.23",
-    "@langchain/langgraph": "^0.2.33",
-    "@langchain/openai": "^0.3.14",
+    "@langchain/core": "^0.3.26",
+    "@langchain/langgraph": "^0.2.36",
+    "@langchain/openai": "^0.3.16",
     "jsdom": "^25.0.1",
-    "sanitize-html": "^2.13.1",
+    "sanitize-html": "^2.14.0",
     "yocto-spinner": "^0.1.2",
     "zod": "^3.24.1"
   },

diff --git a/packages/core/src/actions.ts b/packages/core/src/actions.ts
@@ -1,7 +1,17 @@
-import type { ActionParams, PlayWordInterface } from './types'
+import { markElement, unmarkElement } from './utils'
+import { variablePattern } from './validators'
 
-import { Document } from '@langchain/core/documents'
-import { getInputVariable, markElement, unmarkElement } from './actionUtils'
+/**
+ * Get the input variable from the environment variables.
+ *
+ * @param input
+ * @returns If the input variable is found in the environment variables, return the value of the input variable. Otherwise, return the original input.
+ */
+const getInputVariable = (input: string) => {
+  const match = input.match(variablePattern)
+  if (!match) return input
+  return process.env[match[0]] || input
+}
 
 /**
  * Assert that the content of an element on the page or within the current frame is equal to a specific text.
@@ -152,16 +162,15 @@ export const getAttribute = async (ref: PlayWordInterface, params: ActionParams)
  * Get all frames on the page.
  *
  * @param ref - PlayWord instance.
- * @returns The frame documents stored within the {@link Document} structure.
+ * @returns The frame list containing the name and URL of each frame.
  */
 export const getFrames = async (ref: PlayWordInterface) => {
+  const frames = [] as string[]
   await ref.page.waitForLoadState('load')
 
-  const frames = [] as Document[]
-
   for (const frame of ref.page.frames()) {
     await frame.waitForLoadState('load')
-    frames.push(new Document({ pageContent: JSON.stringify({ name: frame.name(), url: frame.url() }) }))
+    frames.push(JSON.stringify({ name: frame.name(), url: frame.url() }))
   }
 
   return frames

diff --git a/packages/core/src/ai.ts b/packages/core/src/ai.ts
@@ -5,7 +5,7 @@ import type { ClientOptions } from '@langchain/openai'
 
 import { ChatOpenAI, OpenAIEmbeddings } from '@langchain/openai'
 import { z } from 'zod'
-import { MemoryVectorStore } from './memoryVectorStore'
+import { MemoryVectorStore } from './memory'
 
 export const CANDIDATE_SCREENSHOT_REFERENCE = `I will provide you with some candidates of elements and a screenshot.
 Your goal is to find the most relevant candidate that user mentioned in the input and want to interact with.

diff --git a/packages/core/src/graph.ts b/packages/core/src/graph.ts
@@ -1,13 +1,11 @@
 import type { LangGraphRunnableConfig } from '@langchain/langgraph'
-import type { ActionState, PlayWordInterface } from './types'
 
 import { AIMessage } from '@langchain/core/messages'
 import { Annotation, MemorySaver, StateGraph, messagesStateReducer } from '@langchain/langgraph'
 import { ToolNode } from '@langchain/langgraph/prebuilt'
 
-import assertTools from './assertTools'
-import pageTools from './pageTools'
-import { assertionPattern } from './resources'
+import * as tools from './tools'
+import { assertionPattern } from './validators'
 
 /**
  * State annotation for the action graph.
@@ -21,7 +19,7 @@ const annotation = Annotation.Root({ messages: Annotation({ reducer: messagesSta
  */
 const invokeAssertAgent = async ({ messages }: ActionState, { configurable }: LangGraphRunnableConfig) => {
   const { ai, debug, logger } = configurable?.ref as PlayWordInterface
-  const response = await ai.useTools(assertTools, messages)
+  const response = await ai.useTools(tools.assertion, messages)
   if (debug && logger && response.content) logger.text = 'AI: ' + response.content.toString()
   return { messages: [response] }
 }
@@ -33,7 +31,7 @@ const invokeAssertAgent = async ({ messages }: ActionState, { configurable }: La
  */
 const invokePageAgent = async ({ messages }: ActionState, { configurable }: LangGraphRunnableConfig) => {
   const { ai, debug, logger } = configurable?.ref as PlayWordInterface
-  const response = await ai.useTools(pageTools, messages)
+  const response = await ai.useTools(tools.page, messages)
   if (debug && logger && response.content) logger.text = 'AI: ' + response.content.toString()
   return { messages: [response] }
 }
@@ -92,8 +90,8 @@ export const actionGraph = new StateGraph(annotation)
   .addNode('assert', invokeAssertAgent)
   .addNode('page', invokePageAgent)
   .addNode('result', invokeResultAgent)
-  .addNode('assertTools', new ToolNode(assertTools))
-  .addNode('pageTools', new ToolNode(pageTools))
+  .addNode('assertTools', new ToolNode(tools.assertion))
+  .addNode('pageTools', new ToolNode(tools.page))
   .addConditionalEdges('__start__', shouldInvoke, ['page', 'assert'])
   .addConditionalEdges('assert', shouldInvokeAssertTools, ['assertTools', 'result'])
   .addConditionalEdges('page', shouldInvokePageTools, ['pageTools', '__end__'])

diff --git a/packages/core/src/memoryVectorStore.ts → packages/core/src/memory.ts b/packages/core/src/memoryVectorStore.ts → packages/core/src/memory.ts
@@ -3,7 +3,6 @@
  * Reference: https://github.com/langchain-ai/langchainjs/blob/main/langchain/src/vectorstores/memory.ts
  */
 import type { EmbeddingsInterface } from '@langchain/core/embeddings'
-import type { MemoryVector } from './types'
 
 import { Document } from '@langchain/core/documents'
 import { VectorStore } from '@langchain/core/vectorstores'

diff --git a/packages/core/src/playword.ts b/packages/core/src/playword.ts
@@ -1,6 +1,5 @@
 import type { AIMessage } from '@langchain/core/messages'
 import type { Page } from 'playwright-core'
-import type { ActionResult, PlayWordInterface, PlayWordOptions, Recording } from './types'
 
 import { randomUUID } from 'crypto'
 import { access, mkdir, readFile, writeFile } from 'fs/promises'
@@ -10,8 +9,8 @@ import { HumanMessage } from '@langchain/core/messages'
 import { actionGraph } from './graph'
 import * as actions from './actions'
 import { AI } from './ai'
-import { divider, info, startLog } from './logger'
-import { aiPattern } from './resources'
+import { divider, info, startLog } from './utils'
+import { aiPattern } from './validators'
 
 /**
  * Decorator to handle the test fixture, including the setup process and teardown process.

diff --git a/packages/core/src/assertTools.ts → packages/core/src/tools/assertion.ts b/packages/core/src/assertTools.ts → packages/core/src/tools/assertion.ts
@@ -1,12 +1,11 @@
 import type { DynamicStructuredTool } from '@langchain/core/tools'
-import type { ToolConfig } from './types'
 
 import { tool } from '@langchain/core/tools'
 import { z } from 'zod'
 
-import * as actions from './actions'
-import { getElementLocations, sanitize } from './htmlUtils'
-import { genericTags } from './resources'
+import * as actions from '../actions'
+import { getElementLocations, sanitize } from '../utils'
+import { allowedTags } from '../validators'
 
 /**
  * Tools for asserting conditions on the page.
@@ -20,12 +19,12 @@ import { genericTags } from './resources'
  * - **AssertPageTitleEquals**
  * - **AssertPageUrlMatches**
  */
-export default [
+export const assertion = [
   tool(
     async ({ keywords, text }, { configurable }) => {
       const { ref, use_screenshot } = configurable as ToolConfig
       const snapshot = await actions.getSnapshot(ref)
-      const elements = getElementLocations(sanitize(snapshot), genericTags)
+      const elements = getElementLocations(sanitize(snapshot), allowedTags)
 
       if (snapshot !== ref.snapshot || elements.length !== ref.elements.length) {
         if (ref.debug && ref.logger) ref.logger.text = 'Snapshot changed. Embedding the new snapshot...'
@@ -76,7 +75,7 @@ export default [
     async ({ keywords, text }, { configurable }) => {
       const { ref, use_screenshot } = configurable as ToolConfig
       const snapshot = await actions.getSnapshot(ref)
-      const elements = getElementLocations(sanitize(snapshot), genericTags)
+      const elements = getElementLocations(sanitize(snapshot), allowedTags)
 
       if (snapshot !== ref.snapshot || elements.length !== ref.elements.length) {
         if (ref.debug && ref.logger) ref.logger.text = 'Snapshot changed. Embedding the new snapshot...'
@@ -127,7 +126,7 @@ export default [
     async ({ keywords }, { configurable }) => {
       const { ref, use_screenshot } = configurable as ToolConfig
       const snapshot = await actions.getSnapshot(ref)
-      const elements = getElementLocations(sanitize(snapshot), genericTags)
+      const elements = getElementLocations(sanitize(snapshot), allowedTags)
 
       if (snapshot !== ref.snapshot || elements.length !== ref.elements.length) {
         if (ref.debug && ref.logger) ref.logger.text = 'Snapshot changed. Embedding the new snapshot...'
@@ -177,7 +176,7 @@ export default [
     async ({ keywords }, { configurable }) => {
       const { ref, use_screenshot } = configurable as ToolConfig
       const snapshot = await actions.getSnapshot(ref)
-      const elements = getElementLocations(sanitize(snapshot), genericTags)
+      const elements = getElementLocations(sanitize(snapshot), allowedTags)
 
       if (snapshot !== ref.snapshot || elements.length !== ref.elements.length) {
         if (ref.debug && ref.logger) ref.logger.text = 'Snapshot changed. Embedding the new snapshot...'
@@ -227,7 +226,7 @@ export default [
     async ({ keywords }, { configurable }) => {
       const { ref, use_screenshot } = configurable as ToolConfig
       const snapshot = await actions.getSnapshot(ref)
-      const elements = getElementLocations(sanitize(snapshot), genericTags)
+      const elements = getElementLocations(sanitize(snapshot), allowedTags)
 
       if (snapshot !== ref.snapshot || elements.length !== ref.elements.length) {
         if (ref.debug && ref.logger) ref.logger.text = 'Snapshot changed. Embedding the new snapshot...'

diff --git a/packages/core/src/tools/index.ts b/packages/core/src/tools/index.ts
@@ -0,0 +1,2 @@
+export { assertion } from './assertion'
+export { page } from './page'
diff --git a/packages/core/src/pageTools.ts → packages/core/src/tools/page.ts b/packages/core/src/pageTools.ts → packages/core/src/tools/page.ts
@@ -1,12 +1,12 @@
 import type { DynamicStructuredTool } from '@langchain/core/tools'
-import type { ToolConfig } from './types'
 
+import { Document } from '@langchain/core/documents'
 import { tool } from '@langchain/core/tools'
 import { z } from 'zod'
 
-import * as actions from './actions'
-import { getElementLocations, sanitize } from './htmlUtils'
-import { genericTags } from './resources'
+import * as actions from '../actions'
+import { getElementLocations, sanitize } from '../utils'
+import { allowedTags } from '../validators'
 
 /**
  * Tools for interacting with the page.
@@ -26,12 +26,12 @@ import { genericTags } from './resources'
  * - **SwitchFrame**
  * - **WaitForText**
  */
-export default [
+export const page = [
   tool(
     async ({ keywords }, { configurable }) => {
       const { ref, use_screenshot } = configurable as ToolConfig
       const snapshot = await actions.getSnapshot(ref)
-      const elements = getElementLocations(sanitize(snapshot), genericTags)
+      const elements = getElementLocations(sanitize(snapshot), allowedTags)
 
       if (snapshot !== ref.snapshot || elements.length !== ref.elements.length) {
         if (ref.debug && ref.logger) ref.logger.text = 'Snapshot changed. Embedding the new snapshot...'
@@ -78,7 +78,7 @@ export default [
     async ({ attribute, keywords }, { configurable }) => {
       const { ref, use_screenshot } = configurable as ToolConfig
       const snapshot = await actions.getSnapshot(ref)
-      const elements = getElementLocations(sanitize(snapshot), genericTags)
+      const elements = getElementLocations(sanitize(snapshot), allowedTags)
 
       if (snapshot !== ref.snapshot || elements.length !== ref.elements.length) {
         if (ref.debug && ref.logger) ref.logger.text = 'Snapshot changed. Embedding the new snapshot...'
@@ -126,7 +126,7 @@ export default [
     async ({ keywords }, { configurable }) => {
       const { ref, use_screenshot } = configurable as ToolConfig
       const snapshot = await actions.getSnapshot(ref)
-      const elements = getElementLocations(sanitize(snapshot), genericTags)
+      const elements = getElementLocations(sanitize(snapshot), allowedTags)
 
       if (snapshot !== ref.snapshot || elements.length !== ref.elements.length) {
         if (ref.debug && ref.logger) ref.logger.text = 'Snapshot changed. Embedding the new snapshot...'
@@ -173,7 +173,7 @@ export default [
     async ({ keywords }, { configurable }) => {
       const { ref, use_screenshot } = configurable as ToolConfig
       const snapshot = await actions.getSnapshot(ref)
-      const elements = getElementLocations(sanitize(snapshot), genericTags)
+      const elements = getElementLocations(sanitize(snapshot), allowedTags)
 
       if (snapshot !== ref.snapshot || elements.length !== ref.elements.length) {
         if (ref.debug && ref.logger) ref.logger.text = 'Snapshot changed. Embedding the new snapshot...'
@@ -248,7 +248,7 @@ export default [
     async ({ keywords }, { configurable }) => {
       const { ref, use_screenshot } = configurable as ToolConfig
       const snapshot = await actions.getSnapshot(ref)
-      const elements = getElementLocations(sanitize(snapshot), genericTags)
+      const elements = getElementLocations(sanitize(snapshot), allowedTags)
 
       if (snapshot !== ref.snapshot || elements.length !== ref.elements.length) {
         if (ref.debug && ref.logger) ref.logger.text = 'Snapshot changed. Embedding the new snapshot...'
@@ -438,7 +438,8 @@ export default [
 
       if (enterFrame) {
         const frames = await actions.getFrames(ref)
-        const candidate = await ref.ai.getBestCandidate(ref.input, frames)
+        const docs = frames.map((pageContent) => new Document({ pageContent }))
+        const candidate = await ref.ai.getBestCandidate(ref.input, docs)
         if (ref.record)
           ref.recordings[ref.step].actions.push({ name: 'switchFrame', params: { frameNumber: candidate } })
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		export { assertion } from './assertion'
		export { page } from './page'