Skip to content

Commit

Permalink
fix(web-extract): fix the extractor may fail if there is no <body/> (#76
Browse files Browse the repository at this point in the history
)
  • Loading branch information
yuyutaotao authored Aug 28, 2024
1 parent e071adf commit f8fdf60
Show file tree
Hide file tree
Showing 12 changed files with 296 additions and 164 deletions.
14 changes: 10 additions & 4 deletions packages/web-integration/src/common/tasks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ export class PageTaskExecutor {
type: 'Insight',
subType: 'Locate',
param: plan.param,
executor: async (param) => {
executor: async (param, taskContext) => {
const { task } = taskContext;
let insightDump: InsightDump | undefined;
const dumpCollector: DumpSubscriber = (dump) => {
insightDump = dump;
Expand All @@ -124,7 +125,6 @@ export class PageTaskExecutor {
},
});

assert(element, `Element not found: ${param.prompt}`);
if (locateResult) {
this.taskCache.saveCache({
type: 'locate',
Expand All @@ -136,6 +136,13 @@ export class PageTaskExecutor {
response: locateResult,
});
}
if (!element) {
task.log = {
dump: insightDump,
};
throw new Error(`Element not found: ${param.prompt}`);
}

return {
output: {
element,
Expand Down Expand Up @@ -296,8 +303,7 @@ export class PageTaskExecutor {
subType: 'Sleep',
param: plan.param,
executor: async (taskParam) => {
assert(taskParam.timeMs, 'No time to sleep');
await sleep(taskParam.timeMs);
await sleep(taskParam.timeMs || 3000);
},
};
return taskActionSleep;
Expand Down
1 change: 1 addition & 0 deletions packages/web-integration/src/extractor/debug.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ import { extractTextWithPosition } from '.';

console.log(extractTextWithPosition(document.body, true));
console.log(JSON.stringify(extractTextWithPosition(document.body, false)));
(window as any).extractTextWithPosition = extractTextWithPosition;
9 changes: 9 additions & 0 deletions packages/web-integration/src/extractor/dom-util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,12 @@ export function isImgElement(node: Node): node is HTMLImageElement {
export function isTextElement(node: Node): node is HTMLTextAreaElement {
return node.nodeName.toLowerCase() === '#text';
}

export function isWidgetElement(node: Node): node is HTMLElement {
return (
node instanceof HTMLElement &&
(node.hasAttribute('aria-label') ||
node.hasAttribute('aria-controls') ||
node.hasAttribute('aria-labelledby'))
);
}
40 changes: 33 additions & 7 deletions packages/web-integration/src/extractor/extractor.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import { NodeType, TEXT_SIZE_THRESHOLD } from '@midscene/shared/constants';
import { NodeType } from '@midscene/shared/constants';
import {
isButtonElement,
isFormElement,
isImgElement,
isTextElement,
isWidgetElement,
} from './dom-util';
import {
generateHash,
getNodeAttributes,
getPseudoElementContent,
logger,
midsceneGenerateHash,
setDataForNode,
setDebugMode,
visibleRect,
Expand All @@ -36,7 +37,7 @@ export interface ElementInfo {
center: [number, number];
}

const container: HTMLElement = document.body;
const container: HTMLElement = document.body || document;

function generateId(numberId: number) {
// const letters = 'ABCDEFGHIJKLMNPRSTUVXYZ';
Expand Down Expand Up @@ -89,7 +90,7 @@ export function extractTextWithPosition(

if (isFormElement(node)) {
const attributes = getNodeAttributes(node);
const nodeHashId = generateHash(attributes.placeholder, rect);
const nodeHashId = midsceneGenerateHash(attributes.placeholder, rect);
const selector = setDataForNode(node, nodeHashId);
let valueContent =
attributes.value || attributes.placeholder || node.textContent || '';
Expand Down Expand Up @@ -130,7 +131,7 @@ export function extractTextWithPosition(
const attributes = getNodeAttributes(node);
const pseudo = getPseudoElementContent(node);
const content = node.innerText || pseudo.before || pseudo.after || '';
const nodeHashId = generateHash(content, rect);
const nodeHashId = midsceneGenerateHash(content, rect);
const selector = setDataForNode(node, nodeHashId);
elementInfoArray.push({
id: nodeHashId,
Expand All @@ -155,7 +156,7 @@ export function extractTextWithPosition(

if (isImgElement(node)) {
const attributes = getNodeAttributes(node);
const nodeHashId = generateHash('', rect);
const nodeHashId = midsceneGenerateHash('', rect);
const selector = setDataForNode(node, nodeHashId);
elementInfoArray.push({
id: nodeHashId,
Expand Down Expand Up @@ -188,7 +189,7 @@ export function extractTextWithPosition(
if (!text.trim() && attributeKeys.length === 0) {
return;
}
const nodeHashId = generateHash(text, rect);
const nodeHashId = midsceneGenerateHash(text, rect);
const selector = setDataForNode(node, nodeHashId);
elementInfoArray.push({
id: nodeHashId,
Expand All @@ -212,6 +213,31 @@ export function extractTextWithPosition(
return;
}

if (isWidgetElement(node)) {
const attributes = getNodeAttributes(node);
const nodeHashId = midsceneGenerateHash('', rect);
const selector = setDataForNode(node, nodeHashId);
elementInfoArray.push({
id: nodeHashId,
indexId: generateId(nodeIndex++),
nodeHashId,
nodeType: NodeType.FORM_ITEM,
locator: selector,
attributes: {
...attributes,
nodeType: NodeType.FORM_ITEM,
},
content: '',
rect,
center: [
Math.round(rect.left + rect.width / 2),
Math.round(rect.top + rect.height / 2),
],
htmlNode: debugMode ? node : null,
});
return true;
}

return true;
}

Expand Down
23 changes: 14 additions & 9 deletions packages/web-integration/src/extractor/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,22 +154,26 @@ export function visibleRect(
if (parentStyle.overflow === 'hidden') {
const parentRect = parent.getBoundingClientRect();
const tolerance = 10;

if (
rect.top < parentRect.top - tolerance &&
rect.left < parentRect.left - tolerance &&
rect.bottom > parentRect.bottom + tolerance &&
rect.right > parentRect.right + tolerance
rect.right < parentRect.left - tolerance ||
rect.left > parentRect.right + tolerance ||
rect.bottom < parentRect.top - tolerance ||
rect.top > parentRect.bottom + tolerance
) {
logger('Element is clipped by an ancestor', parent, rect, parentRect);
logger(el, 'element is partially or totally hidden by an ancestor', {
rect,
parentRect,
});
return false;
}
}
parent = parent.parentElement;
}

return {
left: Math.round(rect.left - scrollLeft),
top: Math.round(rect.top - scrollTop),
left: rect.left,
top: rect.top,
width: Math.round(rect.width),
height: Math.round(rect.height),
};
Expand Down Expand Up @@ -232,7 +236,7 @@ export function getNodeAttributes(
return Object.fromEntries(attributesList);
}

export function generateHash(content: string, rect: any): string {
export function midsceneGenerateHash(content: string, rect: any): string {
// Combine the input into a string
const combined = JSON.stringify({ content, rect });
// Generates the ha-256 hash value
Expand All @@ -242,4 +246,5 @@ export function generateHash(content: string, rect: any): string {
return hashHex.slice(0, 10);
}

(window as any).generateHash = generateHash;
(window as any).midsceneGenerateHash = midsceneGenerateHash;
(window as any).midsceneVisibleRect = visibleRect;
Original file line number Diff line number Diff line change
Expand Up @@ -372,5 +372,42 @@ exports[`extractor > basic 1`] = `
},
"content": "",
},
{
"attributes": {
"nodeType": "TEXT Node",
},
"content": "content AAA",
},
{
"attributes": {
"aria-label": "Click me",
"class": ".widget",
"nodeType": "FORM_ITEM Node",
"role": "button",
},
"content": "",
},
{
"attributes": {
"nodeType": "TEXT Node",
},
"content": "Click me",
},
{
"attributes": {
"aria-controls": "semi-select-5yxiyng",
"class": ".widget",
"nodeType": "FORM_ITEM Node",
},
"content": "",
},
{
"attributes": {
"aria-labelledby": "eval_object.object_type-label",
"class": ".widget",
"nodeType": "FORM_ITEM Node",
},
"content": "",
},
]
`;
24 changes: 24 additions & 0 deletions packages/web-integration/tests/unit-test/extractor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,30 @@ describe(
await reset();
});

it('scroll', async () => {
const { page, reset } = await launchPage(`file://${pagePath}`, {
viewport: {
width: 1080,
height: 200,
},
});
await page.evaluate(() => {
window.scrollTo(0, 400);
});
await new Promise((resolve) => setTimeout(resolve, 1000));
await generateExtractData(
page,
path.join(__dirname, 'fixtures/extractor/scroll'),
{
disableInputImage: false,
disableOutputImage: false,
disableOutputWithoutTextImg: true,
disableResizeOutputImg: true,
disableSnapshot: true,
},
);
});

it('profile ', async () => {
const { page, reset } = await launchPage('https://webinfra.org/about');
await new Promise((resolve) => setTimeout(resolve, 1000));
Expand Down
Loading

0 comments on commit f8fdf60

Please sign in to comment.