Skip to content

Commit

Permalink
feat: finish insight text skylark
Browse files Browse the repository at this point in the history
  • Loading branch information
da730 committed Jun 17, 2024
1 parent aab2a07 commit 90f7833
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 155 deletions.
16 changes: 15 additions & 1 deletion packages/vmind/src/applications/IngelligentInsight/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import type { InsightContext, InsightOutput } from '../types';
import DataProcessTaskNodeMeta from './taskNodes/dataProcess';
import ExtractInsightTaskNodeMeta from './taskNodes/extractInsight';
import GenerateInsightTextGPTMeta from './taskNodes/generateInsightText/GPT';
import GenerateInsightTextSkylarkMeta from './taskNodes/generateInsightText/skylark';

const intelligentInsightGPTMeta: ApplicationMeta<InsightContext, InsightOutput> = {
name: 'IntelligentInsight',
Expand All @@ -25,7 +26,20 @@ const intelligentInsightGPTMeta: ApplicationMeta<InsightContext, InsightOutput>

const intelligentInsightSkylarkMeta: ApplicationMeta<InsightContext, InsightOutput> = {
name: 'IntelligentInsight',
taskNodes: []
taskNodes: [
{
taskNode: DataProcessTaskNodeMeta,
name: 'dataProcess'
},
{
taskNode: ExtractInsightTaskNodeMeta,
name: 'extractInsight'
},
{
taskNode: GenerateInsightTextSkylarkMeta,
name: 'generateInsightText'
}
]
};

const intelligentInsightMetaByModel = {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import type { GetQuerySQLContext, GetQuerySQLOutput } from '../../../../../applications/dataAggregation/types';
import type { LLMBasedTaskNodeMeta } from '../../../../../base/metaTypes';
import { TaskNodeType } from '../../../../../base/taskNode/types';
import { ModelType } from '../../../../../common/typings';
import { dataQueryRequestLLM, parseSkylarkResponseAsJSON } from './utils';
import { SkylarkDataAggregationPrompt } from './prompt';
import type { InsightContext } from '../../../../types';
import type { GenerateTextOutput } from '../../../types';
import { SkylarkInsightTextPrompt } from './prompt';
import { parseInsightTextResponse, patchInsightText, requestInsightLLM } from './utils';

const GetSQLTaskNodeSkylarkMeta: LLMBasedTaskNodeMeta<GetQuerySQLContext, GetQuerySQLOutput> = {
const GenerateInsightTextSkylarkMeta: LLMBasedTaskNodeMeta<InsightContext, GenerateTextOutput> = {
type: TaskNodeType.LLM_BASED,
modelType: ModelType.SKYLARK,
parser: parseSkylarkResponseAsJSON,
patcher: [(input: GetQuerySQLContext) => input as unknown as GetQuerySQLOutput],
requester: dataQueryRequestLLM,
prompt: new SkylarkDataAggregationPrompt()
parser: parseInsightTextResponse,
patcher: [patchInsightText],
requester: requestInsightLLM,
prompt: new SkylarkInsightTextPrompt()
};

export default GetSQLTaskNodeSkylarkMeta;
export default GenerateInsightTextSkylarkMeta;
Original file line number Diff line number Diff line change
@@ -1,26 +1,18 @@
import { Prompt } from '../../../../../../base/tools/prompt';
import { getInsightTextPrompt } from './template';
import type { GetQuerySQLContext } from '../../../../../../applications/dataAggregation/types';
import { getQueryDatasetPrompt } from './template';
import type { DataAggregationContext } from '../../../../../../applications/types';

const patchDataQueryInput = (userInput: string) =>
userInput + ' 使用` `包裹sql中的所有列名。使用支持的聚合函数将所有的度量列聚合。';

export class SkylarkDataAggregationPrompt extends Prompt<GetQuerySQLContext> {
export class SkylarkInsightTextPrompt extends Prompt<GetQuerySQLContext> {
constructor() {
super('');
}
getSystemPrompt(context: GetQuerySQLContext) {
const { llmOptions } = context;
const QueryDatasetPrompt = getQueryDatasetPrompt(llmOptions.showThoughts ?? true);
return QueryDatasetPrompt;
const InsightTextPrompt = getInsightTextPrompt(llmOptions.insightTextContext);
return InsightTextPrompt;
}

getUserPrompt(context: DataAggregationContext): string {
const { userInput, fieldInfo } = context;
const patchedInput = patchDataQueryInput(userInput);

const queryDatasetMessage = `User's Command: ${patchedInput}\nColumn Information: ${JSON.stringify(fieldInfo)}`;
return queryDatasetMessage;
return '';
}
}
Original file line number Diff line number Diff line change
@@ -1,77 +1,15 @@
/* eslint-disable max-len */
export const VMIND_DATA_SOURCE = 'VMind_data_source';

export const getQueryDatasetPrompt = (
showThoughts: boolean
) => `您是一位数据分析的专家。这是一个名为${VMIND_DATA_SOURCE}的原始数据集。用户会告诉您他的命令和${VMIND_DATA_SOURCE}的列信息。您的任务是根据指令生成一个sql和fieldInfo。只返回一个JSON对象。
# SQL语句编写要求
- 您需要编写一个标准的sql语句。
- 所有的度量列必须被聚合,即使用户没有要求你这样做。支持的聚合函数:["MAX()", "MIN()", "SUM()", "COUNT()", "AVG()"]
- 支持的sql关键字:["SELECT", "FROM", "WHERE", "GROUP BY", "HAVING", "ORDER BY", "LIMIT", "DISTINCT"].
- 不要使用不支持的关键词,如:WITHIN, FIELD。不要使用不支持的聚合函数,如:PERCENTILE_CONT, PERCENTILE。不要使用不支持的操作符。我们将使用alasql执行您的sql。不支持的关键词、函数和操作符会导致系统崩溃。
- 使用\` \`包裹sql中的所有列名
- 让你的sql尽可能简单。
您需要按照以下步骤编写sql语句。
# 步骤
1. 从用户的指令中提取与数据相关的部分。忽略其他与数据无关的部分。
2. 根据列的名称和类型,推断${VMIND_DATA_SOURCE}中与用户指令有关的列,并将其添加到SELECT中。尽可能多地选择相关列,不要遗漏任何可能有关的列。请仔细考虑与时间、日期有关的列,避免遗漏。你只能使用Column Information中提到的列,不要假设不存在的列。如果现有的列不能满足用户的命令,选择Column Information中最相关的列。
3. 不论用户指定了哪种图表类型,将所选择的度量列使用聚合函数聚合,即使你推断它们不适合被聚合,即使用户没有要求你这样做。如果你不确定使用哪个聚合函数,使用SUM()。不要使用不支持的聚合函数。
4. 使用维度列对数据进行分组。
5. 在您的sql中,如有必要,您也可以使用WHERE, HAVING, ORDER BY, LIMIT。使用支持的操作符完成WHERE和HAVING。只能使用如columnA = value1,sum_b > 0的二元表达式。在您的表达式中,只能使用在维度列的domain中出现的维度值。
让我们一步一步思考。不要忘了将所有度量列聚合。
用户将会直接使用JSON.parse()解析您返回的内容,只返回一个不带任何额外内容的JSON对象。您的JSON对象必须包含sql和fieldInfo。
请按以下格式回复:
\`\`\`
{
${showThoughts ? 'thoughts: string //你的想法' : ''}
sql: string; //你的sql。注意,这是一个JSON对象中的字符串,所以必须是一行,不含任何\\n。
fieldInfo: {
fieldName: string; //字段名。
type: string; //字段类型,string,int,date或float。
}[]; //您的sql中字段信息的数组。描述其名称和类型。
}
\`\`\`
#Examples:
User's Command: Show me the change of the GDP rankings of each country.
Column Information: [{"fieldName":"country","type":"string","role":"dimension","domain":["USA", "China", "England"]},{"fieldName":"continent","type":"string","role":"dimension","domain":["North America","Asia","Europe"]},{"fieldName":"GDP","type":"float","role":"measure","domain":[2780,617030]},{"fieldName":"year","type":"int","role":"measure","domain":[1973,2018]}]
Response:
\`\`\`
{
${showThoughts ? '"thoughts": string //your thoughts' : ''}
"sql": "SELECT \`country\`, \`year\`, SUM(\`GDP\`) AS \`total_GDP\` FROM ${VMIND_DATA_SOURCE} GROUP BY \`country\`, \`year\` ORDER BY \`year\`, \`total_GDP\` DESC",
"fieldInfo": [
{
"fieldName": "country",
"type": "string"
},
{
"fieldName": "year",
"type": "date"
},
{
"fieldName": "total_GDP",
"type": "int"
}
]
}
\`\`\`
在上面这个例子中,用户想要展示不同国家GDP排名的变化,相关列有country和GDP。用户需要一个年份列才能展示“变化”,因此我们还需要选择year。GDP是一个指标列,因此我们要将它聚合。从用户输入中无法推断聚合方式,因此使用SUM()。您只需要将生成的JSON返回给用户。
一步完成您的任务。
# 约束:
- 在一行内写出您的sql语句,不要有任何\\n。您的sql必须能够由alasql执行。
- 请不要在您的sql语句中改变或翻译列名,请保持原有的列名不变,即使他们含有空格或-。
- 在你的sql中不要遗漏GROUP BY。
- 直接返回JSON对象,不要有任何其他内容。确保它能够被JavaScript中的JSON.parse()直接解析。
`;
export const getInsightTextPrompt = (context?: string) => `# 任务
用户使用一些洞察提取算法,从数据中发现了一些数据洞察。用户想在图表中使用标注的形式将这些洞察展现出来。请你根据用户输入的json格式的洞察信息,生成能够展示在图表标注中的文本。
# 说明
type: 洞察类型
data: 出现洞察的数据项
value: 洞察的具体值
seriesName: 出现洞察的类别名称
${context && context.length > 0 ? '#背景\n' + context + '\n' : '\n'}
# 要求
1. 生成的文本要尽可能简短,但不能遗漏数据中关键的维度和指标信息,用户需要了解洞察的完整内容
2. 生成的文本要有较高的可读性`;
Original file line number Diff line number Diff line change
@@ -1,66 +1,32 @@
import type { LLMResponse } from '../../../../../common/typings';
import { matchJSONStr, replaceAll } from '../../../../../common/utils/utils';
import type { GetQuerySQLContext } from '../../../../../applications/dataAggregation/types';
import { omit } from '@visactor/chart-advisor';
import type { Requester } from '../../../../../base/tools/requester';
import JSON5 from 'json5';
import { requestSkyLark } from '../../../../../common/utils/skylark';
import { replaceAll } from '../../../../../common/utils/utils';
import type { VMindInsight } from '../../../types';

export const parseJson = (JsonStr: string, prefix?: string) => {
const parseNoPrefixStr = (str: string) => {
//尝试不带前缀的解析
try {
return JSON5.parse(str);
} catch (err) {
return {
error: true
};
}
};
//解析GPT返回的JSON格式
if (prefix) {
//被某些字符包裹
const splitArr = JsonStr.split(prefix);
const splittedStr = splitArr[splitArr.length - 2];
const res = parseNoPrefixStr(splittedStr);
if (!res.error) {
return res;
}
}
//没有被前缀包裹,或者解析被前缀包裹的json失败,尝试直接解析返回结果
const res2 = parseNoPrefixStr(JsonStr);
return res2;
export const parseInsightTextResponse: any = async (promises: any) => {
const responseList = await Promise.all(promises).then(response => {
return response.map(res => {
const choices = res.choices;
const insightText = replaceAll(choices[0].message.content, '\n', ' ');
return insightText;
});
});
return { insightTextList: responseList };
};

export const parseSkylarkResponseAsJSON = (skylarkRes: LLMResponse) => {
try {
if (skylarkRes.error) {
return {
error: true,
...skylarkRes.error
};
}
const choices = skylarkRes.choices;
const content = replaceAll(choices[0].message.content, '\n', ' ');
const jsonStr = matchJSONStr(content);
const resJson = parseJson(jsonStr, '```');
const { sql, fieldInfo: responseFiledInfo } = resJson;
return { sql, llmFieldInfo: responseFiledInfo, usage: skylarkRes.usage };
} catch (err: any) {
return {
error: true,
message: err.message
};
}
export const patchInsightText = (context: any) => {
const { insights, insightTextList } = context;
const insightsNew = insights.map((insight: any, index: number) => ({ ...insight, text: insightTextList[index] }));
return { insights: insightsNew };
};

export const dataQueryRequestLLM: Requester<GetQuerySQLContext> = async (
prompt: string,
queryDatasetMessage: string,
context: GetQuerySQLContext
) => {
const { llmOptions } = context;
const requestFunc = llmOptions.customRequestFunc?.dataQuery ?? requestSkyLark;
const QueryDatasetPrompt = prompt;
const dataProcessRes = await requestFunc(QueryDatasetPrompt, queryDatasetMessage, llmOptions);
return dataProcessRes;
export const requestInsightLLM: Requester<any> = async (prompt: string, message: string, context: any) => {
const { llmOptions, insights } = context;
const requestFunc = llmOptions.customRequestFunc?.IntelligentInsight ?? requestSkyLark;
const insightTextPromises = insights.map((insight: VMindInsight) => {
const userMessage = JSON.stringify(omit(insight, ['significant']), null, 4);
return requestFunc(prompt, userMessage, llmOptions);
});
return insightTextPromises;
};

0 comments on commit 90f7833

Please sign in to comment.