diff --git a/packages/midscene/src/ai-model/inspect.ts b/packages/midscene/src/ai-model/inspect.ts index 16de541f..39909ce9 100644 --- a/packages/midscene/src/ai-model/inspect.ts +++ b/packages/midscene/src/ai-model/inspect.ts @@ -39,7 +39,7 @@ export async function AiInspectElement< }) { const { context, multi, targetElementDescription, callAI, useModel } = options; - const { screenshotBase64 } = context; + const { screenshotBase64, screenshotBase64WithElementMarker } = context; const { description, elementById } = await describeUserPage(context); // meet quick answer @@ -61,7 +61,7 @@ export async function AiInspectElement< { type: 'image_url', image_url: { - url: screenshotBase64, + url: screenshotBase64WithElementMarker || screenshotBase64, }, }, { diff --git a/packages/midscene/src/types.ts b/packages/midscene/src/types.ts index d5e12e34..fef2c9c7 100644 --- a/packages/midscene/src/types.ts +++ b/packages/midscene/src/types.ts @@ -81,6 +81,8 @@ export interface AIAssertionResponse { export abstract class UIContext { abstract screenshotBase64: string; + abstract screenshotBase64WithElementMarker?: string; + abstract content: ElementType[]; abstract size: Size; diff --git a/packages/visualizer/src/component/blackboard.tsx b/packages/visualizer/src/component/blackboard.tsx index 192cb466..70aebec6 100644 --- a/packages/visualizer/src/component/blackboard.tsx +++ b/packages/visualizer/src/component/blackboard.tsx @@ -11,8 +11,6 @@ import { useBlackboardPreference, useInsightDump } from './store'; const itemFillAlpha = 0.4; const highlightAlpha = 0.4; -const bgOnAlpha = 1; -const bgOffAlpha = 0.3; const noop = () => { // noop }; @@ -70,7 +68,7 @@ const BlackBoard = (): JSX.Element => { const highlightIds = highlightElements.map((e) => e.id); const { context } = dump!; - const { size, screenshotBase64 } = context; + const { size, screenshotBase64, screenshotBase64WithElementMarker } = context; const screenWidth = size.width; const screenHeight = size.height; @@ -84,9 +82,11 @@ const BlackBoard = (): JSX.Element => { // key overlays const pixiBgRef = useRef(); - const { bgVisible, setBgVisible, elementsVisible, setTextsVisible } = + const { markerVisible, setMarkerVisible, elementsVisible, setTextsVisible } = useBlackboardPreference(); + const ifMarkerAvailable = !!screenshotBase64WithElementMarker; + useEffect(() => { Promise.resolve( (async () => { @@ -139,14 +139,28 @@ const BlackBoard = (): JSX.Element => { img.onload = () => { if (!app.stage) return; const screenshotTexture = PIXI.Texture.from(img); - const screenshotSprite = new PIXI.Sprite(screenshotTexture); - screenshotSprite.x = 0; - screenshotSprite.y = 0; - screenshotSprite.width = screenWidth; - screenshotSprite.height = screenHeight; - app.stage.addChildAt(screenshotSprite, 0); - pixiBgRef.current = screenshotSprite; - screenshotSprite.alpha = bgVisible ? bgOnAlpha : bgOffAlpha; + const backgroundSprite = new PIXI.Sprite(screenshotTexture); + backgroundSprite.x = 0; + backgroundSprite.y = 0; + backgroundSprite.width = screenWidth; + backgroundSprite.height = screenHeight; + app.stage.addChildAt(backgroundSprite, 0); + + if (ifMarkerAvailable) { + const markerImg = new Image(); + markerImg.src = screenshotBase64WithElementMarker; + markerImg.onload = () => { + const markerTexture = PIXI.Texture.from(markerImg); + const markerSprite = new PIXI.Sprite(markerTexture); + markerSprite.x = 0; + markerSprite.y = 0; + markerSprite.width = screenWidth; + markerSprite.height = screenHeight; + app.stage.addChildAt(markerSprite, 1); + pixiBgRef.current = markerSprite; + markerSprite.visible = markerVisible; + }; + } }; }, [app.stage, appInitialed]); @@ -156,7 +170,7 @@ const BlackBoard = (): JSX.Element => { highlightContainer.removeChildren(); elementMarkContainer.removeChildren(); - // element mark + // element rects context.content.forEach((element) => { const { rect, content, id } = element; const ifHighlight = highlightIds.includes(id); @@ -198,10 +212,10 @@ const BlackBoard = (): JSX.Element => { // elementsVisible, ]); - const onSetBg: CheckboxProps['onChange'] = (e) => { - setBgVisible(e.target.checked); + const onSetMarkerVisible: CheckboxProps['onChange'] = (e) => { + setMarkerVisible(e.target.checked); if (pixiBgRef.current) { - pixiBgRef.current.alpha = e.target.checked ? bgOnAlpha : bgOffAlpha; + pixiBgRef.current.visible = e.target.checked; } }; @@ -238,8 +252,12 @@ const BlackBoard = (): JSX.Element => { />
- - Screenshot + + Marker Elements diff --git a/packages/visualizer/src/component/replay-scripts.tsx b/packages/visualizer/src/component/replay-scripts.tsx index 44ac10dd..e46f8be4 100644 --- a/packages/visualizer/src/component/replay-scripts.tsx +++ b/packages/visualizer/src/component/replay-scripts.tsx @@ -40,6 +40,7 @@ export interface AnimationScript { } const stillDuration = 1200; +const stillAfterInsightDuration = 300; const locateDuration = 800; const actionDuration = 1000; const clearInsightDuration = 200; @@ -185,9 +186,23 @@ export const generateAnimationScripts = ( throw new Error('insight dump is required'); } const insightContentLength = insightDump.context.content.length; + + if (insightDump.context.screenshotBase64WithElementMarker) { + // show the original screenshot first + scripts.push({ + type: 'img', + img: insightDump.context.screenshotBase64, + duration: stillAfterInsightDuration, + title, + subTitle, + }); + } + scripts.push({ type: 'insight', - img: insightDump.context.screenshotBase64, + img: + insightDump.context.screenshotBase64WithElementMarker || + insightDump.context.screenshotBase64, insightDump: insightDump, camera: currentCameraState === fullPageCameraState || !insightCameraState @@ -202,7 +217,7 @@ export const generateAnimationScripts = ( scripts.push({ type: 'sleep', - duration: 800, + duration: stillAfterInsightDuration, title, subTitle, }); diff --git a/packages/visualizer/src/component/store.tsx b/packages/visualizer/src/component/store.tsx index b7e5d545..965889e6 100644 --- a/packages/visualizer/src/component/store.tsx +++ b/packages/visualizer/src/component/store.tsx @@ -13,15 +13,15 @@ import { generateAnimationScripts } from './replay-scripts'; const { create } = Z; export const useBlackboardPreference = create<{ - bgVisible: boolean; + markerVisible: boolean; elementsVisible: boolean; - setBgVisible: (visible: boolean) => void; + setMarkerVisible: (visible: boolean) => void; setTextsVisible: (visible: boolean) => void; }>((set) => ({ - bgVisible: true, + markerVisible: true, elementsVisible: true, - setBgVisible: (visible: boolean) => { - set({ bgVisible: visible }); + setMarkerVisible: (visible: boolean) => { + set({ markerVisible: visible }); }, setTextsVisible: (visible: boolean) => { set({ elementsVisible: visible }); diff --git a/packages/web-integration/src/common/utils.ts b/packages/web-integration/src/common/utils.ts index dd1b61fa..7ab6c7f8 100644 --- a/packages/web-integration/src/common/utils.ts +++ b/packages/web-integration/src/common/utils.ts @@ -44,7 +44,7 @@ export async function parseContextFromWebPage( const size = await imageInfoOfBase64(screenshotBase64); // composite element infos to screenshot - const screenshotBase64WithElementInfos = await compositeElementInfoImg({ + const screenshotBase64WithElementMarker = await compositeElementInfoImg({ inputImgBase64: screenshotBase64.split(';base64,').pop() as string, elementsPositionInfo: elementsPositionInfoWithoutText, }); @@ -52,7 +52,8 @@ export async function parseContextFromWebPage( return { content: elementsInfo, size, - screenshotBase64: `data:image/png;base64,${screenshotBase64WithElementInfos}`, + screenshotBase64, + screenshotBase64WithElementMarker: `data:image/png;base64,${screenshotBase64WithElementMarker}`, url, }; } diff --git a/packages/web-integration/tests/ai/web/puppeteer/showcase.test.ts b/packages/web-integration/tests/ai/web/puppeteer/showcase.test.ts index 0128f66d..25b719fb 100644 --- a/packages/web-integration/tests/ai/web/puppeteer/showcase.test.ts +++ b/packages/web-integration/tests/ai/web/puppeteer/showcase.test.ts @@ -58,9 +58,11 @@ describe( ); const mid = new PuppeteerAgent(originPage); + await mid.aiAction('Click the password input on page'); + await mid.aiAction('scroll down two screen'); - const widgets = await mid.aiQuery( + await mid.aiQuery( 'find all inputs in the page, return the field name in string[]', );