From 90f78333ffe4f37250220ade9b18c0a1b715e9fa Mon Sep 17 00:00:00 2001 From: Darkside Date: Mon, 17 Jun 2024 16:41:44 +0800 Subject: [PATCH] feat: finish insight text skylark --- .../applications/IngelligentInsight/index.ts | 16 +++- .../generateInsightText/skylark/index.ts | 19 ++-- .../skylark/prompt/index.ts | 18 ++-- .../skylark/prompt/template.ts | 86 +++---------------- .../generateInsightText/skylark/utils.ts | 82 ++++++------------ 5 files changed, 66 insertions(+), 155 deletions(-) diff --git a/packages/vmind/src/applications/IngelligentInsight/index.ts b/packages/vmind/src/applications/IngelligentInsight/index.ts index bcb82a71..892c171d 100644 --- a/packages/vmind/src/applications/IngelligentInsight/index.ts +++ b/packages/vmind/src/applications/IngelligentInsight/index.ts @@ -4,6 +4,7 @@ import type { InsightContext, InsightOutput } from '../types'; import DataProcessTaskNodeMeta from './taskNodes/dataProcess'; import ExtractInsightTaskNodeMeta from './taskNodes/extractInsight'; import GenerateInsightTextGPTMeta from './taskNodes/generateInsightText/GPT'; +import GenerateInsightTextSkylarkMeta from './taskNodes/generateInsightText/skylark'; const intelligentInsightGPTMeta: ApplicationMeta = { name: 'IntelligentInsight', @@ -25,7 +26,20 @@ const intelligentInsightGPTMeta: ApplicationMeta const intelligentInsightSkylarkMeta: ApplicationMeta = { name: 'IntelligentInsight', - taskNodes: [] + taskNodes: [ + { + taskNode: DataProcessTaskNodeMeta, + name: 'dataProcess' + }, + { + taskNode: ExtractInsightTaskNodeMeta, + name: 'extractInsight' + }, + { + taskNode: GenerateInsightTextSkylarkMeta, + name: 'generateInsightText' + } + ] }; const intelligentInsightMetaByModel = { diff --git a/packages/vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/index.ts b/packages/vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/index.ts index cdb30394..dc7c5bea 100644 --- a/packages/vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/index.ts +++ b/packages/vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/index.ts @@ -1,17 +1,18 @@ -import type { GetQuerySQLContext, GetQuerySQLOutput } from '../../../../../applications/dataAggregation/types'; import type { LLMBasedTaskNodeMeta } from '../../../../../base/metaTypes'; import { TaskNodeType } from '../../../../../base/taskNode/types'; import { ModelType } from '../../../../../common/typings'; -import { dataQueryRequestLLM, parseSkylarkResponseAsJSON } from './utils'; -import { SkylarkDataAggregationPrompt } from './prompt'; +import type { InsightContext } from '../../../../types'; +import type { GenerateTextOutput } from '../../../types'; +import { SkylarkInsightTextPrompt } from './prompt'; +import { parseInsightTextResponse, patchInsightText, requestInsightLLM } from './utils'; -const GetSQLTaskNodeSkylarkMeta: LLMBasedTaskNodeMeta = { +const GenerateInsightTextSkylarkMeta: LLMBasedTaskNodeMeta = { type: TaskNodeType.LLM_BASED, modelType: ModelType.SKYLARK, - parser: parseSkylarkResponseAsJSON, - patcher: [(input: GetQuerySQLContext) => input as unknown as GetQuerySQLOutput], - requester: dataQueryRequestLLM, - prompt: new SkylarkDataAggregationPrompt() + parser: parseInsightTextResponse, + patcher: [patchInsightText], + requester: requestInsightLLM, + prompt: new SkylarkInsightTextPrompt() }; -export default GetSQLTaskNodeSkylarkMeta; +export default GenerateInsightTextSkylarkMeta; diff --git a/packages/vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/prompt/index.ts b/packages/vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/prompt/index.ts index cd6a2ae7..da3ea55d 100644 --- a/packages/vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/prompt/index.ts +++ b/packages/vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/prompt/index.ts @@ -1,26 +1,18 @@ import { Prompt } from '../../../../../../base/tools/prompt'; +import { getInsightTextPrompt } from './template'; import type { GetQuerySQLContext } from '../../../../../../applications/dataAggregation/types'; -import { getQueryDatasetPrompt } from './template'; import type { DataAggregationContext } from '../../../../../../applications/types'; -const patchDataQueryInput = (userInput: string) => - userInput + ' 使用` `包裹sql中的所有列名。使用支持的聚合函数将所有的度量列聚合。'; - -export class SkylarkDataAggregationPrompt extends Prompt { +export class SkylarkInsightTextPrompt extends Prompt { constructor() { super(''); } getSystemPrompt(context: GetQuerySQLContext) { const { llmOptions } = context; - const QueryDatasetPrompt = getQueryDatasetPrompt(llmOptions.showThoughts ?? true); - return QueryDatasetPrompt; + const InsightTextPrompt = getInsightTextPrompt(llmOptions.insightTextContext); + return InsightTextPrompt; } - getUserPrompt(context: DataAggregationContext): string { - const { userInput, fieldInfo } = context; - const patchedInput = patchDataQueryInput(userInput); - - const queryDatasetMessage = `User's Command: ${patchedInput}\nColumn Information: ${JSON.stringify(fieldInfo)}`; - return queryDatasetMessage; + return ''; } } diff --git a/packages/vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/prompt/template.ts b/packages/vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/prompt/template.ts index 19ef3695..85c87c4d 100644 --- a/packages/vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/prompt/template.ts +++ b/packages/vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/prompt/template.ts @@ -1,77 +1,15 @@ /* eslint-disable max-len */ export const VMIND_DATA_SOURCE = 'VMind_data_source'; -export const getQueryDatasetPrompt = ( - showThoughts: boolean -) => `您是一位数据分析的专家。这是一个名为${VMIND_DATA_SOURCE}的原始数据集。用户会告诉您他的命令和${VMIND_DATA_SOURCE}的列信息。您的任务是根据指令生成一个sql和fieldInfo。只返回一个JSON对象。 - -# SQL语句编写要求 -- 您需要编写一个标准的sql语句。 -- 所有的度量列必须被聚合,即使用户没有要求你这样做。支持的聚合函数:["MAX()", "MIN()", "SUM()", "COUNT()", "AVG()"] -- 支持的sql关键字:["SELECT", "FROM", "WHERE", "GROUP BY", "HAVING", "ORDER BY", "LIMIT", "DISTINCT"]. -- 不要使用不支持的关键词,如:WITHIN, FIELD。不要使用不支持的聚合函数,如:PERCENTILE_CONT, PERCENTILE。不要使用不支持的操作符。我们将使用alasql执行您的sql。不支持的关键词、函数和操作符会导致系统崩溃。 -- 使用\` \`包裹sql中的所有列名 -- 让你的sql尽可能简单。 - -您需要按照以下步骤编写sql语句。 - -# 步骤 -1. 从用户的指令中提取与数据相关的部分。忽略其他与数据无关的部分。 -2. 根据列的名称和类型,推断${VMIND_DATA_SOURCE}中与用户指令有关的列,并将其添加到SELECT中。尽可能多地选择相关列,不要遗漏任何可能有关的列。请仔细考虑与时间、日期有关的列,避免遗漏。你只能使用Column Information中提到的列,不要假设不存在的列。如果现有的列不能满足用户的命令,选择Column Information中最相关的列。 -3. 不论用户指定了哪种图表类型,将所选择的度量列使用聚合函数聚合,即使你推断它们不适合被聚合,即使用户没有要求你这样做。如果你不确定使用哪个聚合函数,使用SUM()。不要使用不支持的聚合函数。 -4. 使用维度列对数据进行分组。 -5. 在您的sql中,如有必要,您也可以使用WHERE, HAVING, ORDER BY, LIMIT。使用支持的操作符完成WHERE和HAVING。只能使用如columnA = value1,sum_b > 0的二元表达式。在您的表达式中,只能使用在维度列的domain中出现的维度值。 - -让我们一步一步思考。不要忘了将所有度量列聚合。 - -用户将会直接使用JSON.parse()解析您返回的内容,只返回一个不带任何额外内容的JSON对象。您的JSON对象必须包含sql和fieldInfo。 - -请按以下格式回复: -\`\`\` -{ -${showThoughts ? 'thoughts: string //你的想法' : ''} -sql: string; //你的sql。注意,这是一个JSON对象中的字符串,所以必须是一行,不含任何\\n。 -fieldInfo: { -fieldName: string; //字段名。 -type: string; //字段类型,string,int,date或float。 -}[]; //您的sql中字段信息的数组。描述其名称和类型。 -} -\`\`\` - -#Examples: - -User's Command: Show me the change of the GDP rankings of each country. -Column Information: [{"fieldName":"country","type":"string","role":"dimension","domain":["USA", "China", "England"]},{"fieldName":"continent","type":"string","role":"dimension","domain":["North America","Asia","Europe"]},{"fieldName":"GDP","type":"float","role":"measure","domain":[2780,617030]},{"fieldName":"year","type":"int","role":"measure","domain":[1973,2018]}] - -Response: -\`\`\` -{ - ${showThoughts ? '"thoughts": string //your thoughts' : ''} - "sql": "SELECT \`country\`, \`year\`, SUM(\`GDP\`) AS \`total_GDP\` FROM ${VMIND_DATA_SOURCE} GROUP BY \`country\`, \`year\` ORDER BY \`year\`, \`total_GDP\` DESC", - "fieldInfo": [ - { - "fieldName": "country", - "type": "string" - }, - { - "fieldName": "year", - "type": "date" - }, - { - "fieldName": "total_GDP", - "type": "int" - } - ] -} -\`\`\` - -在上面这个例子中,用户想要展示不同国家GDP排名的变化,相关列有country和GDP。用户需要一个年份列才能展示“变化”,因此我们还需要选择year。GDP是一个指标列,因此我们要将它聚合。从用户输入中无法推断聚合方式,因此使用SUM()。您只需要将生成的JSON返回给用户。 - -一步完成您的任务。 - -# 约束: -- 在一行内写出您的sql语句,不要有任何\\n。您的sql必须能够由alasql执行。 -- 请不要在您的sql语句中改变或翻译列名,请保持原有的列名不变,即使他们含有空格或-。 -- 在你的sql中不要遗漏GROUP BY。 -- 直接返回JSON对象,不要有任何其他内容。确保它能够被JavaScript中的JSON.parse()直接解析。 -`; +export const getInsightTextPrompt = (context?: string) => `# 任务 +用户使用一些洞察提取算法,从数据中发现了一些数据洞察。用户想在图表中使用标注的形式将这些洞察展现出来。请你根据用户输入的json格式的洞察信息,生成能够展示在图表标注中的文本。 + +# 说明 +type: 洞察类型 +data: 出现洞察的数据项 +value: 洞察的具体值 +seriesName: 出现洞察的类别名称 +${context && context.length > 0 ? '#背景\n' + context + '\n' : '\n'} +# 要求 +1. 生成的文本要尽可能简短,但不能遗漏数据中关键的维度和指标信息,用户需要了解洞察的完整内容 +2. 生成的文本要有较高的可读性`; diff --git a/packages/vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/utils.ts b/packages/vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/utils.ts index 61f6a820..a2119bf4 100644 --- a/packages/vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/utils.ts +++ b/packages/vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/utils.ts @@ -1,66 +1,32 @@ -import type { LLMResponse } from '../../../../../common/typings'; -import { matchJSONStr, replaceAll } from '../../../../../common/utils/utils'; -import type { GetQuerySQLContext } from '../../../../../applications/dataAggregation/types'; +import { omit } from '@visactor/chart-advisor'; import type { Requester } from '../../../../../base/tools/requester'; -import JSON5 from 'json5'; import { requestSkyLark } from '../../../../../common/utils/skylark'; +import { replaceAll } from '../../../../../common/utils/utils'; +import type { VMindInsight } from '../../../types'; -export const parseJson = (JsonStr: string, prefix?: string) => { - const parseNoPrefixStr = (str: string) => { - //尝试不带前缀的解析 - try { - return JSON5.parse(str); - } catch (err) { - return { - error: true - }; - } - }; - //解析GPT返回的JSON格式 - if (prefix) { - //被某些字符包裹 - const splitArr = JsonStr.split(prefix); - const splittedStr = splitArr[splitArr.length - 2]; - const res = parseNoPrefixStr(splittedStr); - if (!res.error) { - return res; - } - } - //没有被前缀包裹,或者解析被前缀包裹的json失败,尝试直接解析返回结果 - const res2 = parseNoPrefixStr(JsonStr); - return res2; +export const parseInsightTextResponse: any = async (promises: any) => { + const responseList = await Promise.all(promises).then(response => { + return response.map(res => { + const choices = res.choices; + const insightText = replaceAll(choices[0].message.content, '\n', ' '); + return insightText; + }); + }); + return { insightTextList: responseList }; }; -export const parseSkylarkResponseAsJSON = (skylarkRes: LLMResponse) => { - try { - if (skylarkRes.error) { - return { - error: true, - ...skylarkRes.error - }; - } - const choices = skylarkRes.choices; - const content = replaceAll(choices[0].message.content, '\n', ' '); - const jsonStr = matchJSONStr(content); - const resJson = parseJson(jsonStr, '```'); - const { sql, fieldInfo: responseFiledInfo } = resJson; - return { sql, llmFieldInfo: responseFiledInfo, usage: skylarkRes.usage }; - } catch (err: any) { - return { - error: true, - message: err.message - }; - } +export const patchInsightText = (context: any) => { + const { insights, insightTextList } = context; + const insightsNew = insights.map((insight: any, index: number) => ({ ...insight, text: insightTextList[index] })); + return { insights: insightsNew }; }; -export const dataQueryRequestLLM: Requester = async ( - prompt: string, - queryDatasetMessage: string, - context: GetQuerySQLContext -) => { - const { llmOptions } = context; - const requestFunc = llmOptions.customRequestFunc?.dataQuery ?? requestSkyLark; - const QueryDatasetPrompt = prompt; - const dataProcessRes = await requestFunc(QueryDatasetPrompt, queryDatasetMessage, llmOptions); - return dataProcessRes; +export const requestInsightLLM: Requester = async (prompt: string, message: string, context: any) => { + const { llmOptions, insights } = context; + const requestFunc = llmOptions.customRequestFunc?.IntelligentInsight ?? requestSkyLark; + const insightTextPromises = insights.map((insight: VMindInsight) => { + const userMessage = JSON.stringify(omit(insight, ['significant']), null, 4); + return requestFunc(prompt, userMessage, llmOptions); + }); + return insightTextPromises; };