Skip to content

Commit

Permalink
fix: add marker image into UIContext
Browse files Browse the repository at this point in the history
  • Loading branch information
yuyutaotao committed Oct 17, 2024
1 parent 101b0a2 commit 0c87cda
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 30 deletions.
4 changes: 2 additions & 2 deletions packages/midscene/src/ai-model/inspect.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ export async function AiInspectElement<
}) {
const { context, multi, targetElementDescription, callAI, useModel } =
options;
const { screenshotBase64 } = context;
const { screenshotBase64, screenshotBase64WithElementMarker } = context;
const { description, elementById } = await describeUserPage(context);

// meet quick answer
Expand All @@ -61,7 +61,7 @@ export async function AiInspectElement<
{
type: 'image_url',
image_url: {
url: screenshotBase64,
url: screenshotBase64WithElementMarker || screenshotBase64,
},
},
{
Expand Down
2 changes: 2 additions & 0 deletions packages/midscene/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ export interface AIAssertionResponse {
export abstract class UIContext<ElementType extends BaseElement = BaseElement> {
abstract screenshotBase64: string;

abstract screenshotBase64WithElementMarker?: string;

abstract content: ElementType[];

abstract size: Size;
Expand Down
54 changes: 36 additions & 18 deletions packages/visualizer/src/component/blackboard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ import { useBlackboardPreference, useInsightDump } from './store';

const itemFillAlpha = 0.4;
const highlightAlpha = 0.4;
const bgOnAlpha = 1;
const bgOffAlpha = 0.3;
const noop = () => {
// noop
};
Expand Down Expand Up @@ -70,7 +68,7 @@ const BlackBoard = (): JSX.Element => {
const highlightIds = highlightElements.map((e) => e.id);

const { context } = dump!;
const { size, screenshotBase64 } = context;
const { size, screenshotBase64, screenshotBase64WithElementMarker } = context;

const screenWidth = size.width;
const screenHeight = size.height;
Expand All @@ -84,9 +82,11 @@ const BlackBoard = (): JSX.Element => {

// key overlays
const pixiBgRef = useRef<PIXI.Sprite>();
const { bgVisible, setBgVisible, elementsVisible, setTextsVisible } =
const { markerVisible, setMarkerVisible, elementsVisible, setTextsVisible } =
useBlackboardPreference();

const ifMarkerAvailable = !!screenshotBase64WithElementMarker;

useEffect(() => {
Promise.resolve(
(async () => {
Expand Down Expand Up @@ -139,14 +139,28 @@ const BlackBoard = (): JSX.Element => {
img.onload = () => {
if (!app.stage) return;
const screenshotTexture = PIXI.Texture.from(img);
const screenshotSprite = new PIXI.Sprite(screenshotTexture);
screenshotSprite.x = 0;
screenshotSprite.y = 0;
screenshotSprite.width = screenWidth;
screenshotSprite.height = screenHeight;
app.stage.addChildAt(screenshotSprite, 0);
pixiBgRef.current = screenshotSprite;
screenshotSprite.alpha = bgVisible ? bgOnAlpha : bgOffAlpha;
const backgroundSprite = new PIXI.Sprite(screenshotTexture);
backgroundSprite.x = 0;
backgroundSprite.y = 0;
backgroundSprite.width = screenWidth;
backgroundSprite.height = screenHeight;
app.stage.addChildAt(backgroundSprite, 0);

if (ifMarkerAvailable) {
const markerImg = new Image();
markerImg.src = screenshotBase64WithElementMarker;
markerImg.onload = () => {
const markerTexture = PIXI.Texture.from(markerImg);
const markerSprite = new PIXI.Sprite(markerTexture);
markerSprite.x = 0;
markerSprite.y = 0;
markerSprite.width = screenWidth;
markerSprite.height = screenHeight;
app.stage.addChildAt(markerSprite, 1);
pixiBgRef.current = markerSprite;
markerSprite.visible = markerVisible;
};
}
};
}, [app.stage, appInitialed]);

Expand All @@ -156,7 +170,7 @@ const BlackBoard = (): JSX.Element => {
highlightContainer.removeChildren();
elementMarkContainer.removeChildren();

// element mark
// element rects
context.content.forEach((element) => {
const { rect, content, id } = element;
const ifHighlight = highlightIds.includes(id);
Expand Down Expand Up @@ -198,10 +212,10 @@ const BlackBoard = (): JSX.Element => {
// elementsVisible,
]);

const onSetBg: CheckboxProps['onChange'] = (e) => {
setBgVisible(e.target.checked);
const onSetMarkerVisible: CheckboxProps['onChange'] = (e) => {
setMarkerVisible(e.target.checked);
if (pixiBgRef.current) {
pixiBgRef.current.alpha = e.target.checked ? bgOnAlpha : bgOffAlpha;
pixiBgRef.current.visible = e.target.checked;
}
};

Expand Down Expand Up @@ -238,8 +252,12 @@ const BlackBoard = (): JSX.Element => {
/>
<div className="blackboard-filter">
<div className="overlay-control">
<Checkbox checked={bgVisible} onChange={onSetBg}>
Screenshot
<Checkbox
checked={markerVisible}
onChange={onSetMarkerVisible}
disabled={!ifMarkerAvailable}
>
Marker
</Checkbox>
<Checkbox checked={elementsVisible} onChange={onSetElementsVisible}>
Elements
Expand Down
19 changes: 17 additions & 2 deletions packages/visualizer/src/component/replay-scripts.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ export interface AnimationScript {
}

const stillDuration = 1200;
const stillAfterInsightDuration = 300;
const locateDuration = 800;
const actionDuration = 1000;
const clearInsightDuration = 200;
Expand Down Expand Up @@ -185,9 +186,23 @@ export const generateAnimationScripts = (
throw new Error('insight dump is required');
}
const insightContentLength = insightDump.context.content.length;

if (insightDump.context.screenshotBase64WithElementMarker) {
// show the original screenshot first
scripts.push({
type: 'img',
img: insightDump.context.screenshotBase64,
duration: stillAfterInsightDuration,
title,
subTitle,
});
}

scripts.push({
type: 'insight',
img: insightDump.context.screenshotBase64,
img:
insightDump.context.screenshotBase64WithElementMarker ||
insightDump.context.screenshotBase64,
insightDump: insightDump,
camera:
currentCameraState === fullPageCameraState || !insightCameraState
Expand All @@ -202,7 +217,7 @@ export const generateAnimationScripts = (

scripts.push({
type: 'sleep',
duration: 800,
duration: stillAfterInsightDuration,
title,
subTitle,
});
Expand Down
10 changes: 5 additions & 5 deletions packages/visualizer/src/component/store.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ import { generateAnimationScripts } from './replay-scripts';

const { create } = Z;
export const useBlackboardPreference = create<{
bgVisible: boolean;
markerVisible: boolean;
elementsVisible: boolean;
setBgVisible: (visible: boolean) => void;
setMarkerVisible: (visible: boolean) => void;
setTextsVisible: (visible: boolean) => void;
}>((set) => ({
bgVisible: true,
markerVisible: true,
elementsVisible: true,
setBgVisible: (visible: boolean) => {
set({ bgVisible: visible });
setMarkerVisible: (visible: boolean) => {
set({ markerVisible: visible });
},
setTextsVisible: (visible: boolean) => {
set({ elementsVisible: visible });
Expand Down
5 changes: 3 additions & 2 deletions packages/web-integration/src/common/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,16 @@ export async function parseContextFromWebPage(
const size = await imageInfoOfBase64(screenshotBase64);

// composite element infos to screenshot
const screenshotBase64WithElementInfos = await compositeElementInfoImg({
const screenshotBase64WithElementMarker = await compositeElementInfoImg({
inputImgBase64: screenshotBase64.split(';base64,').pop() as string,
elementsPositionInfo: elementsPositionInfoWithoutText,
});

return {
content: elementsInfo,
size,
screenshotBase64: `data:image/png;base64,${screenshotBase64WithElementInfos}`,
screenshotBase64,
screenshotBase64WithElementMarker: `data:image/png;base64,${screenshotBase64WithElementMarker}`,
url,
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,11 @@ describe(
);
const mid = new PuppeteerAgent(originPage);

await mid.aiAction('Click the password input on page');

await mid.aiAction('scroll down two screen');

const widgets = await mid.aiQuery(
await mid.aiQuery(
'find all inputs in the page, return the field name in string[]',
);

Expand Down

0 comments on commit 0c87cda

Please sign in to comment.