-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
66 additions
and
155 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
19 changes: 10 additions & 9 deletions
19
.../vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/index.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,18 @@ | ||
import type { GetQuerySQLContext, GetQuerySQLOutput } from '../../../../../applications/dataAggregation/types'; | ||
import type { LLMBasedTaskNodeMeta } from '../../../../../base/metaTypes'; | ||
import { TaskNodeType } from '../../../../../base/taskNode/types'; | ||
import { ModelType } from '../../../../../common/typings'; | ||
import { dataQueryRequestLLM, parseSkylarkResponseAsJSON } from './utils'; | ||
import { SkylarkDataAggregationPrompt } from './prompt'; | ||
import type { InsightContext } from '../../../../types'; | ||
import type { GenerateTextOutput } from '../../../types'; | ||
import { SkylarkInsightTextPrompt } from './prompt'; | ||
import { parseInsightTextResponse, patchInsightText, requestInsightLLM } from './utils'; | ||
|
||
const GetSQLTaskNodeSkylarkMeta: LLMBasedTaskNodeMeta<GetQuerySQLContext, GetQuerySQLOutput> = { | ||
const GenerateInsightTextSkylarkMeta: LLMBasedTaskNodeMeta<InsightContext, GenerateTextOutput> = { | ||
type: TaskNodeType.LLM_BASED, | ||
modelType: ModelType.SKYLARK, | ||
parser: parseSkylarkResponseAsJSON, | ||
patcher: [(input: GetQuerySQLContext) => input as unknown as GetQuerySQLOutput], | ||
requester: dataQueryRequestLLM, | ||
prompt: new SkylarkDataAggregationPrompt() | ||
parser: parseInsightTextResponse, | ||
patcher: [patchInsightText], | ||
requester: requestInsightLLM, | ||
prompt: new SkylarkInsightTextPrompt() | ||
}; | ||
|
||
export default GetSQLTaskNodeSkylarkMeta; | ||
export default GenerateInsightTextSkylarkMeta; |
18 changes: 5 additions & 13 deletions
18
...src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/prompt/index.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,18 @@ | ||
import { Prompt } from '../../../../../../base/tools/prompt'; | ||
import { getInsightTextPrompt } from './template'; | ||
import type { GetQuerySQLContext } from '../../../../../../applications/dataAggregation/types'; | ||
import { getQueryDatasetPrompt } from './template'; | ||
import type { DataAggregationContext } from '../../../../../../applications/types'; | ||
|
||
const patchDataQueryInput = (userInput: string) => | ||
userInput + ' 使用` `包裹sql中的所有列名。使用支持的聚合函数将所有的度量列聚合。'; | ||
|
||
export class SkylarkDataAggregationPrompt extends Prompt<GetQuerySQLContext> { | ||
export class SkylarkInsightTextPrompt extends Prompt<GetQuerySQLContext> { | ||
constructor() { | ||
super(''); | ||
} | ||
getSystemPrompt(context: GetQuerySQLContext) { | ||
const { llmOptions } = context; | ||
const QueryDatasetPrompt = getQueryDatasetPrompt(llmOptions.showThoughts ?? true); | ||
return QueryDatasetPrompt; | ||
const InsightTextPrompt = getInsightTextPrompt(llmOptions.insightTextContext); | ||
return InsightTextPrompt; | ||
} | ||
|
||
getUserPrompt(context: DataAggregationContext): string { | ||
const { userInput, fieldInfo } = context; | ||
const patchedInput = patchDataQueryInput(userInput); | ||
|
||
const queryDatasetMessage = `User's Command: ${patchedInput}\nColumn Information: ${JSON.stringify(fieldInfo)}`; | ||
return queryDatasetMessage; | ||
return ''; | ||
} | ||
} |
86 changes: 12 additions & 74 deletions
86
.../applications/IngelligentInsight/taskNodes/generateInsightText/skylark/prompt/template.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,77 +1,15 @@ | ||
/* eslint-disable max-len */ | ||
export const VMIND_DATA_SOURCE = 'VMind_data_source'; | ||
|
||
export const getQueryDatasetPrompt = ( | ||
showThoughts: boolean | ||
) => `您是一位数据分析的专家。这是一个名为${VMIND_DATA_SOURCE}的原始数据集。用户会告诉您他的命令和${VMIND_DATA_SOURCE}的列信息。您的任务是根据指令生成一个sql和fieldInfo。只返回一个JSON对象。 | ||
# SQL语句编写要求 | ||
- 您需要编写一个标准的sql语句。 | ||
- 所有的度量列必须被聚合,即使用户没有要求你这样做。支持的聚合函数:["MAX()", "MIN()", "SUM()", "COUNT()", "AVG()"] | ||
- 支持的sql关键字:["SELECT", "FROM", "WHERE", "GROUP BY", "HAVING", "ORDER BY", "LIMIT", "DISTINCT"]. | ||
- 不要使用不支持的关键词,如:WITHIN, FIELD。不要使用不支持的聚合函数,如:PERCENTILE_CONT, PERCENTILE。不要使用不支持的操作符。我们将使用alasql执行您的sql。不支持的关键词、函数和操作符会导致系统崩溃。 | ||
- 使用\` \`包裹sql中的所有列名 | ||
- 让你的sql尽可能简单。 | ||
您需要按照以下步骤编写sql语句。 | ||
# 步骤 | ||
1. 从用户的指令中提取与数据相关的部分。忽略其他与数据无关的部分。 | ||
2. 根据列的名称和类型,推断${VMIND_DATA_SOURCE}中与用户指令有关的列,并将其添加到SELECT中。尽可能多地选择相关列,不要遗漏任何可能有关的列。请仔细考虑与时间、日期有关的列,避免遗漏。你只能使用Column Information中提到的列,不要假设不存在的列。如果现有的列不能满足用户的命令,选择Column Information中最相关的列。 | ||
3. 不论用户指定了哪种图表类型,将所选择的度量列使用聚合函数聚合,即使你推断它们不适合被聚合,即使用户没有要求你这样做。如果你不确定使用哪个聚合函数,使用SUM()。不要使用不支持的聚合函数。 | ||
4. 使用维度列对数据进行分组。 | ||
5. 在您的sql中,如有必要,您也可以使用WHERE, HAVING, ORDER BY, LIMIT。使用支持的操作符完成WHERE和HAVING。只能使用如columnA = value1,sum_b > 0的二元表达式。在您的表达式中,只能使用在维度列的domain中出现的维度值。 | ||
让我们一步一步思考。不要忘了将所有度量列聚合。 | ||
用户将会直接使用JSON.parse()解析您返回的内容,只返回一个不带任何额外内容的JSON对象。您的JSON对象必须包含sql和fieldInfo。 | ||
请按以下格式回复: | ||
\`\`\` | ||
{ | ||
${showThoughts ? 'thoughts: string //你的想法' : ''} | ||
sql: string; //你的sql。注意,这是一个JSON对象中的字符串,所以必须是一行,不含任何\\n。 | ||
fieldInfo: { | ||
fieldName: string; //字段名。 | ||
type: string; //字段类型,string,int,date或float。 | ||
}[]; //您的sql中字段信息的数组。描述其名称和类型。 | ||
} | ||
\`\`\` | ||
#Examples: | ||
User's Command: Show me the change of the GDP rankings of each country. | ||
Column Information: [{"fieldName":"country","type":"string","role":"dimension","domain":["USA", "China", "England"]},{"fieldName":"continent","type":"string","role":"dimension","domain":["North America","Asia","Europe"]},{"fieldName":"GDP","type":"float","role":"measure","domain":[2780,617030]},{"fieldName":"year","type":"int","role":"measure","domain":[1973,2018]}] | ||
Response: | ||
\`\`\` | ||
{ | ||
${showThoughts ? '"thoughts": string //your thoughts' : ''} | ||
"sql": "SELECT \`country\`, \`year\`, SUM(\`GDP\`) AS \`total_GDP\` FROM ${VMIND_DATA_SOURCE} GROUP BY \`country\`, \`year\` ORDER BY \`year\`, \`total_GDP\` DESC", | ||
"fieldInfo": [ | ||
{ | ||
"fieldName": "country", | ||
"type": "string" | ||
}, | ||
{ | ||
"fieldName": "year", | ||
"type": "date" | ||
}, | ||
{ | ||
"fieldName": "total_GDP", | ||
"type": "int" | ||
} | ||
] | ||
} | ||
\`\`\` | ||
在上面这个例子中,用户想要展示不同国家GDP排名的变化,相关列有country和GDP。用户需要一个年份列才能展示“变化”,因此我们还需要选择year。GDP是一个指标列,因此我们要将它聚合。从用户输入中无法推断聚合方式,因此使用SUM()。您只需要将生成的JSON返回给用户。 | ||
一步完成您的任务。 | ||
# 约束: | ||
- 在一行内写出您的sql语句,不要有任何\\n。您的sql必须能够由alasql执行。 | ||
- 请不要在您的sql语句中改变或翻译列名,请保持原有的列名不变,即使他们含有空格或-。 | ||
- 在你的sql中不要遗漏GROUP BY。 | ||
- 直接返回JSON对象,不要有任何其他内容。确保它能够被JavaScript中的JSON.parse()直接解析。 | ||
`; | ||
export const getInsightTextPrompt = (context?: string) => `# 任务 | ||
用户使用一些洞察提取算法,从数据中发现了一些数据洞察。用户想在图表中使用标注的形式将这些洞察展现出来。请你根据用户输入的json格式的洞察信息,生成能够展示在图表标注中的文本。 | ||
# 说明 | ||
type: 洞察类型 | ||
data: 出现洞察的数据项 | ||
value: 洞察的具体值 | ||
seriesName: 出现洞察的类别名称 | ||
${context && context.length > 0 ? '#背景\n' + context + '\n' : '\n'} | ||
# 要求 | ||
1. 生成的文本要尽可能简短,但不能遗漏数据中关键的维度和指标信息,用户需要了解洞察的完整内容 | ||
2. 生成的文本要有较高的可读性`; |
82 changes: 24 additions & 58 deletions
82
.../vmind/src/applications/IngelligentInsight/taskNodes/generateInsightText/skylark/utils.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,66 +1,32 @@ | ||
import type { LLMResponse } from '../../../../../common/typings'; | ||
import { matchJSONStr, replaceAll } from '../../../../../common/utils/utils'; | ||
import type { GetQuerySQLContext } from '../../../../../applications/dataAggregation/types'; | ||
import { omit } from '@visactor/chart-advisor'; | ||
import type { Requester } from '../../../../../base/tools/requester'; | ||
import JSON5 from 'json5'; | ||
import { requestSkyLark } from '../../../../../common/utils/skylark'; | ||
import { replaceAll } from '../../../../../common/utils/utils'; | ||
import type { VMindInsight } from '../../../types'; | ||
|
||
export const parseJson = (JsonStr: string, prefix?: string) => { | ||
const parseNoPrefixStr = (str: string) => { | ||
//尝试不带前缀的解析 | ||
try { | ||
return JSON5.parse(str); | ||
} catch (err) { | ||
return { | ||
error: true | ||
}; | ||
} | ||
}; | ||
//解析GPT返回的JSON格式 | ||
if (prefix) { | ||
//被某些字符包裹 | ||
const splitArr = JsonStr.split(prefix); | ||
const splittedStr = splitArr[splitArr.length - 2]; | ||
const res = parseNoPrefixStr(splittedStr); | ||
if (!res.error) { | ||
return res; | ||
} | ||
} | ||
//没有被前缀包裹,或者解析被前缀包裹的json失败,尝试直接解析返回结果 | ||
const res2 = parseNoPrefixStr(JsonStr); | ||
return res2; | ||
export const parseInsightTextResponse: any = async (promises: any) => { | ||
const responseList = await Promise.all(promises).then(response => { | ||
return response.map(res => { | ||
const choices = res.choices; | ||
const insightText = replaceAll(choices[0].message.content, '\n', ' '); | ||
return insightText; | ||
}); | ||
}); | ||
return { insightTextList: responseList }; | ||
}; | ||
|
||
export const parseSkylarkResponseAsJSON = (skylarkRes: LLMResponse) => { | ||
try { | ||
if (skylarkRes.error) { | ||
return { | ||
error: true, | ||
...skylarkRes.error | ||
}; | ||
} | ||
const choices = skylarkRes.choices; | ||
const content = replaceAll(choices[0].message.content, '\n', ' '); | ||
const jsonStr = matchJSONStr(content); | ||
const resJson = parseJson(jsonStr, '```'); | ||
const { sql, fieldInfo: responseFiledInfo } = resJson; | ||
return { sql, llmFieldInfo: responseFiledInfo, usage: skylarkRes.usage }; | ||
} catch (err: any) { | ||
return { | ||
error: true, | ||
message: err.message | ||
}; | ||
} | ||
export const patchInsightText = (context: any) => { | ||
const { insights, insightTextList } = context; | ||
const insightsNew = insights.map((insight: any, index: number) => ({ ...insight, text: insightTextList[index] })); | ||
return { insights: insightsNew }; | ||
}; | ||
|
||
export const dataQueryRequestLLM: Requester<GetQuerySQLContext> = async ( | ||
prompt: string, | ||
queryDatasetMessage: string, | ||
context: GetQuerySQLContext | ||
) => { | ||
const { llmOptions } = context; | ||
const requestFunc = llmOptions.customRequestFunc?.dataQuery ?? requestSkyLark; | ||
const QueryDatasetPrompt = prompt; | ||
const dataProcessRes = await requestFunc(QueryDatasetPrompt, queryDatasetMessage, llmOptions); | ||
return dataProcessRes; | ||
export const requestInsightLLM: Requester<any> = async (prompt: string, message: string, context: any) => { | ||
const { llmOptions, insights } = context; | ||
const requestFunc = llmOptions.customRequestFunc?.IntelligentInsight ?? requestSkyLark; | ||
const insightTextPromises = insights.map((insight: VMindInsight) => { | ||
const userMessage = JSON.stringify(omit(insight, ['significant']), null, 4); | ||
return requestFunc(prompt, userMessage, llmOptions); | ||
}); | ||
return insightTextPromises; | ||
}; |