feat: implement clip for BrowsingContext.captureScreenshot (#1212)

This PR also temporarily removes support for non-top-level contexts as this is current unsupported.
GoogleChromeLabs · Aug 28, 2023 · b17379f · b17379f
1 parent af698bb
commit b17379f
Show file tree

Hide file tree

Showing 14 changed files with 242 additions and 80 deletions.
diff --git a/.gitignore b/.gitignore
@@ -15,6 +15,5 @@ wptreport*.json
 .nyc_output*
 .wireit/
 MANIFEST.json
-*.png
 coverage/
 *.cddl
diff --git a/src/bidiMapper/domains/context/BrowsingContextImpl.ts b/src/bidiMapper/domains/context/BrowsingContextImpl.ts
@@ -24,6 +24,8 @@ import {
   UnsupportedOperationException,
   type EmptyResult,
   InvalidArgumentException,
+  NoSuchElementException,
+  UnableToCaptureScreenException,
 } from '../../../protocol/protocol.js';
 import {Deferred} from '../../../utils/deferred.js';
 import {LogType, type LoggerFn} from '../../../utils/log.js';
@@ -33,6 +35,7 @@ import {Realm} from '../script/Realm.js';
 import type {RealmStorage} from '../script/RealmStorage.js';
 import type {Result} from '../../../utils/result.js';
 import {assert} from '../../../utils/assert.js';
+import {Script} from '../../../protocol/protocol.js';
 
 import type {BrowsingContextStorage} from './BrowsingContextStorage.js';
 import type {CdpTarget} from './CdpTarget.js';
@@ -704,56 +707,37 @@ export class BrowsingContextImpl {
     await this.#cdpTarget.cdpClient.sendCommand('Page.bringToFront');
   }
 
-  async captureScreenshot(): Promise<BrowsingContext.CaptureScreenshotResult> {
+  async captureScreenshot(
+    params: BrowsingContext.CaptureScreenshotParameters
+  ): Promise<BrowsingContext.CaptureScreenshotResult> {
+    if (!this.isTopLevelContext()) {
+      throw new UnsupportedOperationException(
+        `Non-top-level 'context' (${params.context}) is currently not supported`
+      );
+    }
+
     // XXX: Focus the original tab after the screenshot is taken.
     // This is needed because the screenshot gets blocked until the active tab gets focus.
     await this.#cdpTarget.cdpClient.sendCommand('Page.bringToFront');
 
-    let clip: Protocol.DOM.Rect;
+    let rect = await this.#parseRect(params.clip);
 
-    if (this.isTopLevelContext()) {
-      const {cssContentSize, cssLayoutViewport} =
-        await this.#cdpTarget.cdpClient.sendCommand('Page.getLayoutMetrics');
-      clip = {
-        x: cssContentSize.x,
-        y: cssContentSize.y,
-        width: cssLayoutViewport.clientWidth,
-        height: cssLayoutViewport.clientHeight,
-      };
-    } else {
-      const {
-        result: {value: iframeDocRect},
-      } = await this.#cdpTarget.cdpClient.sendCommand(
-        'Runtime.callFunctionOn',
-        {
-          functionDeclaration: String(() => {
-            const docRect =
-              globalThis.document.documentElement.getBoundingClientRect();
-            return JSON.stringify({
-              x: docRect.x,
-              y: docRect.y,
-              width: docRect.width,
-              height: docRect.height,
-            });
-          }),
-          executionContextId: this.#defaultRealm.executionContextId,
-        }
-      );
-      clip = JSON.parse(iframeDocRect);
-    }
+    const {cssContentSize, cssLayoutViewport} =
+      await this.#cdpTarget.cdpClient.sendCommand('Page.getLayoutMetrics');
+    const viewport = {
+      x: cssContentSize.x,
+      y: cssContentSize.y,
+      width: cssLayoutViewport.clientWidth,
+      height: cssLayoutViewport.clientHeight,
+    };
+
+    rect = rect ? getIntersectionRect(rect, viewport) : viewport;
 
     const result = await this.#cdpTarget.cdpClient.sendCommand(
       'Page.captureScreenshot',
-      {
-        clip: {
-          ...clip,
-          scale: 1.0,
-        },
-      }
+      {clip: {...rect, scale: 1.0}}
     );
-    return {
-      data: result.data,
-    };
+    return {data: result.data};
   }
 
   async print(
@@ -848,11 +832,162 @@ export class BrowsingContextImpl {
     }
   }
 
+  /**
+   * See
+   * https://w3c.github.io/webdriver-bidi/#:~:text=If%20command%20parameters%20contains%20%22clip%22%3A
+   */
+  async #parseRect(clip?: BrowsingContext.ClipRectangle) {
+    if (!clip) {
+      return;
+    }
+    switch (clip.type) {
+      case 'viewport':
+        return {x: clip.x, y: clip.y, width: clip.width, height: clip.height};
+      case 'element': {
+        if (clip.scrollIntoView) {
+          throw new UnsupportedOperationException(
+            `'scrollIntoView' is currently not supported`
+          );
+        }
+        // TODO: #1213: Use custom sandbox specifically for Chromium BiDi
+        const sandbox = await this.getOrCreateSandbox(undefined);
+        const result = await sandbox.callFunction(
+          String((element: unknown) => {
+            return element instanceof Element;
+          }),
+          {type: 'undefined'},
+          [clip.element],
+          false,
+          Script.ResultOwnership.None,
+          {}
+        );
+        if (result.type === 'exception') {
+          throw new NoSuchElementException(
+            `Element '${clip.element.sharedId}' was not found`
+          );
+        }
+        assert(result.result.type === 'boolean');
+        if (!result.result.value) {
+          throw new NoSuchElementException(
+            `Node '${clip.element.sharedId}' is not an Element`
+          );
+        }
+        {
+          const result = await sandbox.callFunction(
+            String((element: Element) => {
+              const rect = element.getBoundingClientRect();
+              return {
+                x: rect.x,
+                y: rect.y,
+                height: rect.height,
+                width: rect.width,
+              };
+            }),
+            {type: 'undefined'},
+            [clip.element],
+            false,
+            Script.ResultOwnership.None,
+            {}
+          );
+          assert(result.type === 'success');
+          const rect = deserializeDOMRect(result.result);
+          if (!rect) {
+            throw new UnableToCaptureScreenException(
+              `Could not get bounding box for Element '${clip.element.sharedId}'`
+            );
+          }
+          return rect;
+        }
+      }
+    }
+  }
+
   async close(): Promise<void> {
     await this.#cdpTarget.cdpClient.sendCommand('Page.close');
   }
 }
 
+function deserializeDOMRect(
+  result: Script.RemoteValue
+): Protocol.DOM.Rect | undefined {
+  if (result.type !== 'object' || result.value === undefined) {
+    return;
+  }
+  const x = result.value.find(([key]) => {
+    return key === 'x';
+  })?.[1];
+  const y = result.value.find(([key]) => {
+    return key === 'y';
+  })?.[1];
+  const height = result.value.find(([key]) => {
+    return key === 'height';
+  })?.[1];
+  const width = result.value.find(([key]) => {
+    return key === 'width';
+  })?.[1];
+  if (
+    x?.type !== 'number' ||
+    y?.type !== 'number' ||
+    height?.type !== 'number' ||
+    width?.type !== 'number'
+  ) {
+    return;
+  }
+  return {
+    x: x.value,
+    y: y.value,
+    width: width.value,
+    height: height.value,
+  } as Protocol.DOM.Rect;
+}
+
+/** @see https://w3c.github.io/webdriver-bidi/#normalize-rect */
+function normalizeRect(box: Readonly<Protocol.DOM.Rect>): Protocol.DOM.Rect {
+  return {
+    ...(box.width < 0
+      ? {
+          x: box.x + box.width,
+          width: -box.width,
+        }
+      : {
+          x: box.x,
+          width: box.width,
+        }),
+    ...(box.height < 0
+      ? {
+          y: box.y + box.height,
+          height: -box.height,
+        }
+      : {
+          y: box.y,
+          height: box.height,
+        }),
+  };
+}
+
+/** @see https://w3c.github.io/webdriver-bidi/#rectangle-intersection */
+function getIntersectionRect(
+  first: Readonly<Protocol.DOM.Rect>,
+  second: Readonly<Protocol.DOM.Rect>
+): Protocol.DOM.Rect {
+  first = normalizeRect(first);
+  second = normalizeRect(second);
+  const x = Math.max(first.x, second.x);
+  const y = Math.max(first.y, second.y);
+  return {
+    x,
+    y,
+    width: Math.max(
+      Math.min(first.x + first.width, second.x + second.width) - x,
+      0
+    ),
+    height: Math.max(
+      Math.min(first.y + first.height, second.y + second.height) - y,
+      0
+    ),
+  };
+}
+
 function parseInteger(value: string) {
   value = value.trim();
   if (!/^[0-9]+$/.test(value)) {

diff --git a/src/bidiMapper/domains/context/BrowsingContextProcessor.ts b/src/bidiMapper/domains/context/BrowsingContextProcessor.ts
@@ -165,7 +165,7 @@ export class BrowsingContextProcessor {
     params: BrowsingContext.CaptureScreenshotParameters
   ): Promise<BrowsingContext.CaptureScreenshotResult> {
     const context = this.#browsingContextStorage.getContext(params.context);
-    return context.captureScreenshot();
+    return context.captureScreenshot(params);
   }
 
   async print(

diff --git a/tests/browsing_context/capture_screenshot/element.png b/tests/browsing_context/capture_screenshot/element.png
diff --git a/tests/browsing_context/capture_screenshot/test_capture_screenshot.py b/tests/browsing_context/capture_screenshot/test_capture_screenshot.py
@@ -17,7 +17,7 @@
 
 import pytest
 from anys import ANY_STR
-from test_helpers import (assert_images_equal, execute_command, get_tree,
+from test_helpers import (assert_images_similar, execute_command, get_tree,
                           goto_url, read_JSON_message, send_JSON_command)
 
 
@@ -65,7 +65,51 @@ async def test_screenshot(websocket, context_id, png_filename,
         resp = await read_JSON_message(websocket)
         assert resp["result"] == {'data': ANY_STR}
 
-        assert_images_equal(resp["result"]["data"], png_base64)
+        assert_images_similar(resp["result"]["data"], png_base64)
+
+
+@pytest.mark.asyncio
+async def test_screenshot_element(websocket, context_id, query_selector,
+                                  get_cdp_session_id, html):
+    await goto_url(websocket, context_id, html('<div>hello</div>'))
+    session_id = await get_cdp_session_id(context_id)
+
+    # Set a fixed viewport to make the test deterministic.
+    await execute_command(
+        websocket, {
+            "method": "cdp.sendCommand",
+            "params": {
+                "method": "Emulation.setDeviceMetricsOverride",
+                "params": {
+                    "width": 200,
+                    "height": 200,
+                    "deviceScaleFactor": 1.0,
+                    "mobile": False,
+                },
+                "session": session_id
+            }
+        })
+
+    await send_JSON_command(
+        websocket, {
+            "method": "browsingContext.captureScreenshot",
+            "params": {
+                "context": context_id,
+                "clip": {
+                    "type": "element",
+                    "element": await query_selector("div")
+                }
+            }
+        })
+
+    resp = await read_JSON_message(websocket)
+    assert resp["result"] == {'data': ANY_STR}
+
+    with open(Path(__file__).parent.resolve() / 'element.png',
+              'rb') as image_file:
+        assert_images_similar(
+            resp["result"]["data"],
+            base64.b64encode(image_file.read()).decode('utf-8'))
 
 
 @pytest.mark.asyncio
@@ -115,4 +159,4 @@ async def test_screenshot_oopif(websocket, context_id, html, iframe,
               'rb') as image_file:
         png_base64 = base64.b64encode(image_file.read()).decode('utf-8')
 
-        assert_images_equal(resp["result"]["data"], png_base64)
+        assert_images_similar(resp["result"]["data"], png_base64)
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
@@ -193,8 +193,10 @@ def AnyExtending(expected: list | dict):
     return expected
 
 
-def assert_images_equal(img1: Image.Image | str, img2: Image.Image | str):
-    """Assert that the given images are equal."""
+def assert_images_similar(img1: Image.Image | str,
+                          img2: Image.Image | str,
+                          percent=0.90):
+    """Assert that the given images are similar based on the given percent."""
     if isinstance(img1, str):
         img1 = Image.open(io.BytesIO(base64.b64decode(img1)))
     if isinstance(img2, str):
@@ -209,8 +211,14 @@ def assert_images_equal(img1: Image.Image | str, img2: Image.Image | str):
     else:
         equal_alphas = True
 
-    equal_content = not ImageChops.difference(img1.convert("RGB"),
-                                              img2.convert("RGB")).getbbox()
+    difference = ImageChops.difference(img1.convert("RGB"),
+                                       img2.convert("RGB")).getdata()
+    pixel_count = 0
+    for pixel in difference:
+        if pixel == (0, 0, 0):
+            pixel_count += 1
+
+    equal_content = pixel_count / len(difference) > percent
 
     assert equal_alphas
     assert equal_size

diff --git a/...hromedriver/headless/webdriver/tests/bidi/browsing_context/capture_screenshot/clip.py.ini b/...hromedriver/headless/webdriver/tests/bidi/browsing_context/capture_screenshot/clip.py.ini
@@ -1,12 +1,6 @@
 [clip.py]
-  [test_clip_element]
-    expected: FAIL
-
   [test_clip_element_with_scroll_into_view]
     expected: FAIL
 
-  [test_clip_viewport]
-    expected: FAIL
-
   [test_clip_viewport_scroll_to]
     expected: FAIL
diff --git a/...romedriver/headless/webdriver/tests/bidi/browsing_context/capture_screenshot/frame.py.ini b/...romedriver/headless/webdriver/tests/bidi/browsing_context/capture_screenshot/frame.py.ini
@@ -0,0 +1,3 @@
+[frame.py]
+  [test_iframe]
+    expected: FAIL
diff --git a/...medriver/headless/webdriver/tests/bidi/browsing_context/capture_screenshot/invalid.py.ini b/...medriver/headless/webdriver/tests/bidi/browsing_context/capture_screenshot/invalid.py.ini
@@ -1,7 +1,4 @@
 [invalid.py]
-  [test_params_clip_element_sharedId_invalid_value]
-    expected: FAIL
-
   [test_params_clip_viewport_dimensions_invalid_value]
     expected: FAIL