Skip to content

Commit

Permalink
feat(ai-model): merge ai planning and insight call to accelerate the …
Browse files Browse the repository at this point in the history
…aiAction (#97)



---------

Co-authored-by: zhouxiao.shaw <[email protected]>

* feat(ai-model): optimize AI model for element inspection

* feat(ai-model): optimize AI model and add quick answer functionality

---------

Co-authored-by: yuyutaotao <[email protected]>

* feat(ai-model): implement quick answer functionality for element inspection

---------

Co-authored-by: zhouxiao.shaw <[email protected]>
  • Loading branch information
yuyutaotao and zhoushaw authored Oct 12, 2024
1 parent 18b5e92 commit f9dc0f6
Show file tree
Hide file tree
Showing 115 changed files with 7,407 additions and 9,311 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ai.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
fi
- name: Build project
run: pnpm run build:pkg
run: pnpm run build

- name: Run e2e tests
run: pnpm run e2e
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
run: pnpm install --frozen-lockfile

- name: Build project
run: pnpm run build:pkg
run: pnpm run build

- name: Run tests
run: pnpm run test
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ npx nx build @midscene/web
Build all packages:

```sh
pnpm run build:pkg
pnpm run build
```

---
Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
"private": true,
"version": "0.5.1",
"scripts": {
"build:pkg": "nx run-many --target=build --projects=@midscene/core,@midscene/shared,@midscene/visualizer,@midscene/web,@midscene/cli --verbose",
"build": "nx run-many --target=build --projects=@midscene/core,@midscene/shared,@midscene/visualizer,@midscene/web,@midscene/cli --verbose",
"test": "nx run-many --target=test --projects=@midscene/core,--projects=@midscene/shared,@midscene/visualizer,@midscene/web,@midscene/cli --verbose",
"test:ai": "nx run-many --target=test:ai --projects=@midscene/core,@midscene/web --verbose",
"e2e": "nx run @midscene/web:e2e --verbose",
"e2e:cache": "nx run @midscene/web:e2e:cache --verbose",
"e2e:report": "nx run @midscene/web:e2e:report --verbose",
"test:ai:all": "npm run e2e && npm run e2e:cache && npm run e2e:report && npm run test:ai",
"prepare": "pnpm run build:pkg && simple-git-hooks",
"prepare": "pnpm run build && simple-git-hooks",
"check-dependency-version": "check-dependency-version-consistency .",
"lint": "npx biome check . --diagnostic-level=warn --no-errors-on-unmatched --fix",
"format:ci": "pretty-quick --since HEAD~1",
Expand Down
2 changes: 1 addition & 1 deletion packages/cli/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@midscene/cli",
"description": "Cli for Midscene.js",
"description": "An AI-powered automation SDK can control the page, perform assertions, and extract data in JSON format using natural language. See https://midscenejs.com/ for details.",
"version": "0.5.1",
"jsnext:source": "./src/index.ts",
"main": "./dist/lib/index.js",
Expand Down
41 changes: 9 additions & 32 deletions packages/midscene/package.json
Original file line number Diff line number Diff line change
@@ -1,43 +1,18 @@
{
"name": "@midscene/core",
"description": "Hello, It's Midscene",
"description": "An AI-powered automation SDK can control the page, perform assertions, and extract data in JSON format using natural language. See https://midscenejs.com/ for details.",
"version": "0.5.1",
"jsnext:source": "./src/index.ts",
"type": "commonjs",
"main": "./dist/lib/index.js",
"module": "./dist/es/index.js",
"types": "./dist/types/index.d.ts",
"files": ["dist", "report", "README.md"],
"exports": {
".": {
"types": "./dist/types/index.d.ts",
"import": "./dist/es/index.js",
"require": "./dist/lib/index.js"
},
"./query": {
"types": "./dist/types/query/index.d.ts",
"import": "./dist/es/query/index.js",
"require": "./dist/lib/query/index.js"
},
"./demo_data": {
"types": "./demo_data/index.d.ts",
"import": "./demo_data/index.js",
"require": "./demo_data/index.js"
},
"./utils": {
"types": "./dist/types/utils.d.ts",
"import": "./dist/es/utils.js",
"require": "./dist/lib/utils.js"
},
"./ai-model": {
"types": "./dist/types/ai-model.d.ts",
"import": "./dist/es/ai-model.js",
"require": "./dist/lib/ai-model.js"
},
"./image": {
"types": "./dist/types/image.d.ts",
"import": "./dist/es/image.js",
"require": "./dist/lib/image.js"
}
".": "./dist/lib/index.js",
"./query": "./dist/types/query/index.d.ts",
"./utils": "./dist/lib/utils.js",
"./ai-model": "./dist/lib/ai-model.js",
"./image": "./dist/lib/image.js"
},
"typesVersions": {
"*": {
Expand All @@ -55,6 +30,8 @@
"upgrade": "modern upgrade",
"test": "vitest --run",
"test:ai": "AITEST=true npm run test",
"evaluate": "npm run test:ai -- tests/ai/evaluate/inspect.test.ts",
"evaluate:plan": "PLAN_INSPECT=true npm run test:ai -- tests/ai/evaluate/inspect.test.ts",
"prepublishOnly": "npm run build"
},
"dependencies": {
Expand Down
31 changes: 10 additions & 21 deletions packages/midscene/src/ai-model/automation/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ import {
callAiFn,
transformUserMessages,
} from '../common';
import { systemPromptToTaskPlanning } from '../prompt/planning';
import { describeUserPage } from '../prompt/util';
import { systemPromptToTaskPlanning } from './planning';

export async function plan(
userPrompt: string,
Expand All @@ -16,7 +16,9 @@ export async function plan(
callAI?: typeof callAiFn<PlanningAIResponse>;
},
useModel?: 'coze' | 'openAI',
): Promise<{ plans: PlanningAction[] }> {
): Promise<{
plans: PlanningAction[];
}> {
const { callAI, context } = opts || {};
const { screenshotBase64 } = context;
const { description: pageDescription } = await describeUserPage(context);
Expand Down Expand Up @@ -51,19 +53,12 @@ export async function plan(
},
];

if (callAI) {
planFromAI = await callAI({
msgs,
AIActionType: AIActionType.PLAN,
useModel,
});
} else {
planFromAI = await callAiFn({
msgs,
AIActionType: AIActionType.PLAN,
useModel,
});
}
const call = callAI || callAiFn;
planFromAI = await call({
msgs,
AIActionType: AIActionType.PLAN,
useModel,
});

const actions = planFromAI?.actions || [];

Expand All @@ -74,11 +69,5 @@ export async function plan(
throw new Error(planFromAI.error);
}

// actions.forEach((task) => {
// if (task.type === 'Error') {
// throw new Error(task.thought);
// }
// });

return { plans: actions };
}
103 changes: 0 additions & 103 deletions packages/midscene/src/ai-model/automation/planning.ts

This file was deleted.

22 changes: 17 additions & 5 deletions packages/midscene/src/ai-model/inspect.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import type {
AIAssertionResponse,
AIElementParseResponse,
AISectionParseResponse,
AISingleElementResponse,
BaseElement,
UIContext,
} from '@/types';
Expand Down Expand Up @@ -31,16 +32,27 @@ export async function AiInspectElement<
>(options: {
context: UIContext<ElementType>;
multi: boolean;
findElementDescription: string;
targetElementDescription: string;
callAI?: typeof callAiFn<AIElementParseResponse>;
useModel?: 'coze' | 'openAI';
quickAnswer?: AISingleElementResponse;
}) {
const { context, multi, findElementDescription, callAI, useModel } = options;
const { context, multi, targetElementDescription, callAI, useModel } =
options;
const { screenshotBase64 } = context;
const { description, elementById } = await describeUserPage(context);

const systemPrompt = systemPromptToFindElement();
// meet quick answer
if (options.quickAnswer?.id && elementById(options.quickAnswer.id)) {
return {
parseResult: {
elements: [options.quickAnswer],
},
elementById,
};
}

const systemPrompt = systemPromptToFindElement();
const msgs: AIArgs = [
{ role: 'system', content: systemPrompt },
{
Expand All @@ -58,10 +70,10 @@ export async function AiInspectElement<
pageDescription: \n
${description}
Here is the description of the findElement. Just go ahead:
Here is the item user want to find. Just go ahead:
=====================================
${JSON.stringify({
description: findElementDescription,
description: targetElementDescription,
multi: multiDescription(multi),
})}
=====================================
Expand Down
6 changes: 3 additions & 3 deletions packages/midscene/src/ai-model/openai/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ import { AIResponseFormat } from '@/types';
import { wrapOpenAI } from 'langsmith/wrappers';
import OpenAI, { type ClientOptions, AzureOpenAI } from 'openai';
import type { ChatCompletionMessageParam } from 'openai/resources';
import { planSchema } from '../automation/planning';
import { AIActionType } from '../common';
import { findElementSchema } from '../prompt/element_inspector';
import { planSchema } from '../prompt/planning';
import { assertSchema } from '../prompt/util';

export const MIDSCENE_OPENAI_INIT_CONFIG_JSON =
Expand Down Expand Up @@ -51,7 +51,7 @@ async function createOpenAI() {

if (process.env[MIDSCENE_LANGSMITH_DEBUG]) {
console.log('DEBUGGING MODE: langsmith wrapper enabled');
const openai = wrapOpenAI(new OpenAI());
const openai = wrapOpenAI(new OpenAI(extraConfig));
return openai;
}

Expand All @@ -73,7 +73,7 @@ export async function call(
model,
messages,
response_format: responseFormat,
temperature: 0.2,
temperature: 0.1,
stream: false,
});
shouldPrintTiming && console.timeEnd('Midscene - AI call');
Expand Down
11 changes: 7 additions & 4 deletions packages/midscene/src/ai-model/prompt/element_inspector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ Input Example:
},
"elementInfos": [
{
"id": "3", // ID of the element
"id": "we23xsfwe", // ID of the element
"indexId": "0", // Index of the element,The image is labeled to the left of the element
"attributes": { // Attributes of the element
"nodeType": "IMG Node", // Type of element, types include: TEXT Node, IMG Node, BUTTON Node, INPUT Node
"src": "https://ap-southeast-3.m",
Expand All @@ -77,7 +78,8 @@ Input Example:
}
},
{
"id": "4", // ID of the element
"id": "wefew2222few2", // ID of the element
"indexId": "1", // Index of the element,The image is labeled to the left of the element
"attributes": { // Attributes of the element
"nodeType": "IMG Node", // Type of element, types include: TEXT Node, IMG Node, BUTTON Node, INPUT Node
"src": "data:image/png;base64,iVBORw0KGgoAAAANSU...",
Expand All @@ -93,7 +95,8 @@ Input Example:
},
...
{
"id": "27",
"id": "kwekfj2323",
"indexId": "2", // Index of the element,The image is labeled to the left of the element
"attributes": {
"nodeType": "TEXT Node",
"class": ".product-name"
Expand Down Expand Up @@ -125,7 +128,7 @@ Output Example:
"reason": "Reason for finding element 4: It is located in the upper right corner, is an image type, and according to the screenshot, it is a shopping cart icon button",
"text": "",
// ID of this element, replace with actual value in practice
"id": "4"
"id": "wefew2222few2"
}
],
"errors": []
Expand Down
Loading

0 comments on commit f9dc0f6

Please sign in to comment.