Skip to content

Commit

Permalink
feat: 1494 Adds accessible semantics
Browse files Browse the repository at this point in the history
  • Loading branch information
MattL75 committed May 12, 2023
1 parent 2b04432 commit 19a52b7
Show file tree
Hide file tree
Showing 10 changed files with 210 additions and 42 deletions.
1 change: 1 addition & 0 deletions __mocks__/_failing_page.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export default {
getAnnotations: () => new Promise((resolve, reject) => reject(new Error())),
getOperatorList: () => new Promise((resolve, reject) => reject(new Error())),
getTextContent: () => new Promise((resolve, reject) => reject(new Error())),
getStructTree: () => new Promise<void>((resolve) => resolve()),
getViewport: () => ({
width: 600,
height: 800,
Expand Down
12 changes: 11 additions & 1 deletion src/Page/PageCanvas.spec.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { beforeAll, describe, expect, it, vi } from 'vitest';
import React from 'react';
import { render } from '@testing-library/react';
import { render, waitFor } from '@testing-library/react';

import { pdfjs } from '../index.test';

Expand Down Expand Up @@ -103,5 +103,15 @@ describe('PageCanvas', () => {
expect(canvasRef).toHaveBeenCalled();
expect(canvasRef).toHaveBeenCalledWith(expect.any(HTMLElement));
});

it('generates a struct tree inside the canvas', async () => {
renderWithContext(<PageCanvas />, {
page,
scale: 1,
});

const canvas = document.querySelector('canvas');
await waitFor(() => expect(canvas?.children.length).not.toBe(0));
});
});
});
21 changes: 18 additions & 3 deletions src/Page/PageCanvas.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import React, { useCallback, useContext, useEffect, useMemo, useRef } from 'react';
import React, { useCallback, useContext, useEffect, useMemo, useRef, useState } from 'react';
import mergeRefs from 'merge-refs';
import invariant from 'tiny-invariant';
import warning from 'tiny-warning';
Expand All @@ -15,7 +15,8 @@ import {

import { isRef } from '../shared/propTypes';

import type { RenderParameters } from 'pdfjs-dist/types/src/display/api';
import type { RenderParameters, StructTreeNode } from 'pdfjs-dist/types/src/display/api';
import StructTree from '../StructTree';

const ANNOTATION_MODE = pdfjs.AnnotationMode;

Expand All @@ -34,6 +35,7 @@ export default function PageCanvas(props: PageCanvasProps) {
devicePixelRatio: devicePixelRatioProps,
onRenderError: onRenderErrorProps,
onRenderSuccess: onRenderSuccessProps,
customTextRenderer,
page,
renderForms,
rotate,
Expand All @@ -45,6 +47,17 @@ export default function PageCanvas(props: PageCanvasProps) {

invariant(page, 'Attempted to render page canvas, but no page was specified.');

const [structTree, setStructTree] = useState<StructTreeNode | null>(null);

useEffect(() => {
if (!customTextRenderer) {
page.getStructTree().then((tree) => {
setStructTree(tree);
});
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, []);

const devicePixelRatio = devicePixelRatioProps || getDevicePixelRatio();

/**
Expand Down Expand Up @@ -169,7 +182,9 @@ export default function PageCanvas(props: PageCanvasProps) {
display: 'block',
userSelect: 'none',
}}
/>
>
{!!structTree && <StructTree node={structTree} />}
</canvas>
);
}

Expand Down
57 changes: 20 additions & 37 deletions src/Page/TextLayer.spec.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@ function renderWithContext(children: React.ReactNode, context: Partial<PageConte
};
}

async function getRenderedTextLayer(page: PDFPageProxy, includeMarkedContent = true) {
const renderedTextLayer = document.createElement('div');

await pdfjs.renderTextLayer({
container: renderedTextLayer,
textContentSource: page.streamTextContent({ includeMarkedContent }),
viewport: page.getViewport({ scale: 1 }),
}).promise;

return renderedTextLayer;
}

describe('TextLayer', () => {
// Loaded page
let page: PDFPageProxy;
Expand All @@ -43,6 +55,9 @@ describe('TextLayer', () => {
let desiredTextItems: TextContent['items'];
let desiredTextItems2: TextContent['items'];

let markedRenderedTextLayer: HTMLDivElement;
let unmarkedRenderedTextLayer: HTMLDivElement;

beforeAll(async () => {
const pdf = await pdfjs.getDocument({ data: pdfFile.arrayBuffer }).promise;

Expand All @@ -53,6 +68,9 @@ describe('TextLayer', () => {
page2 = await pdf.getPage(2);
const textContent2 = await page2.getTextContent();
desiredTextItems2 = textContent2.items;

markedRenderedTextLayer = await getRenderedTextLayer(page, true);
unmarkedRenderedTextLayer = await getRenderedTextLayer(page, false);
});

describe('loading', () => {
Expand Down Expand Up @@ -139,7 +157,7 @@ describe('TextLayer', () => {
const wrapper = container.firstElementChild as HTMLDivElement;
const textItems = wrapper.children;

expect(textItems).toHaveLength(desiredTextItems.length + 1);
expect(textItems).toHaveLength(markedRenderedTextLayer.children.length + 1);
});

it('renders text content properly given customTextRenderer', async () => {
Expand All @@ -161,42 +179,7 @@ describe('TextLayer', () => {
const wrapper = container.firstElementChild as HTMLDivElement;
const textItems = wrapper.children;

expect(textItems).toHaveLength(desiredTextItems.length + 1);
});

it('maps textContent items to actual TextLayer children properly', async () => {
const { func: onRenderTextLayerSuccess, promise: onRenderTextLayerSuccessPromise } =
makeAsyncCallback();

const { container, rerender } = renderWithContext(<TextLayer />, {
onRenderTextLayerSuccess,
page,
});

expect.assertions(1);

await onRenderTextLayerSuccessPromise;

const wrapper = container.firstElementChild as HTMLDivElement;
const innerHTML = wrapper.innerHTML;

const { func: onRenderTextLayerSuccess2, promise: onRenderTextLayerSuccessPromise2 } =
makeAsyncCallback();

const customTextRenderer = (item: { str: string }) => item.str;

rerender(<TextLayer />, {
customTextRenderer,
onRenderTextLayerSuccess: onRenderTextLayerSuccess2,
page,
});

await onRenderTextLayerSuccessPromise2;

const wrapper2 = container.firstElementChild as HTMLDivElement;
const innerHTML2 = wrapper2.innerHTML;

expect(innerHTML).toEqual(innerHTML2);
expect(textItems).toHaveLength(unmarkedRenderedTextLayer.children.length + 1);
});

it('calls customTextRenderer with necessary arguments', async () => {
Expand Down
2 changes: 1 addition & 1 deletion src/Page/TextLayer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ export default function TextLayer() {

layer.innerHTML = '';

const textContentSource = page.streamTextContent();
const textContentSource = page.streamTextContent({ includeMarkedContent: !customTextRenderer });

const parameters = {
container: layer,
Expand Down
35 changes: 35 additions & 0 deletions src/StructTree/StructTree.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import React, { useMemo } from 'react';
import PropTypes from 'prop-types';
import { getAttributes } from './utils';
import type { StructTreeProps } from './types';
import type { StructTreeNode } from 'pdfjs-dist/types/src/display/api';

export default function StructTree({ node }: StructTreeProps) {
const attributes = useMemo(() => getAttributes(node), [node]);

const childNodes = useMemo(() => {
if (
node.children &&
!(node.children.length === 1 && node.children[0] && 'id' in node.children[0])
) {
return node.children.map((child, index) => (
// Safe to use index for key as the array is bound to the pdf structure
// eslint-disable-next-line react/no-array-index-key
<StructTree key={index} node={child as StructTreeNode} />
));
}
return null;
}, [node]);

return <span {...attributes}>{childNodes}</span>;
}

StructTree.propTypes = {
node: PropTypes.shape({
children: PropTypes.array,
role: PropTypes.string,
alt: PropTypes.string,
lang: PropTypes.string,
id: PropTypes.string,
}).isRequired,
};
58 changes: 58 additions & 0 deletions src/StructTree/constants.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// From pdfjs-dist/lib/web/struct_tree_layer_builder.js
export const PDF_ROLE_TO_HTML_ROLE = {
// Document level structure types
Document: null, // There's a "document" role, but it doesn't make sense here.
DocumentFragment: null,
// Grouping level structure types
Part: 'group',
Sect: 'group', // XXX: There's a "section" role, but it's abstract.
Div: 'group',
Aside: 'note',
NonStruct: 'none',
// Block level structure types
P: null,
// H<n>,
H: 'heading',
Title: null,
FENote: 'note',
// Sub-block level structure type
Sub: 'group',
// General inline level structure types
Lbl: null,
Span: null,
Em: null,
Strong: null,
Link: 'link',
Annot: 'note',
Form: 'form',
// Ruby and Warichu structure types
Ruby: null,
RB: null,
RT: null,
RP: null,
Warichu: null,
WT: null,
WP: null,
// List standard structure types
L: 'list',
LI: 'listitem',
LBody: null,
// Table standard structure types
Table: 'table',
TR: 'row',
TH: 'columnheader',
TD: 'cell',
THead: 'columnheader',
TBody: null,
TFoot: null,
// Standard structure type Caption
Caption: null,
// Standard structure type Figure
Figure: 'figure',
// Standard structure type Formula
Formula: null,
// standard structure type Artifact
Artifact: null,
};

export const HEADING_PATTERN = /^H(\d+)$/;
1 change: 1 addition & 0 deletions src/StructTree/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export { default } from './StructTree';
23 changes: 23 additions & 0 deletions src/StructTree/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import { PDF_ROLE_TO_HTML_ROLE } from './constants';

export type PdfTagRole = keyof typeof PDF_ROLE_TO_HTML_ROLE;

export type StructTreeNode = {
children?: StructTreeNode[];
role?: string;
id?: string;
lang?: string;
alt?: string;
};

export type StructTreeProps = {
node: StructTreeNode;
};

export type StructTreeAttributes = {
lang?: string;
role?: string;
'aria-level'?: number;
'aria-label'?: string;
'aria-owns'?: string;
};
42 changes: 42 additions & 0 deletions src/StructTree/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/* eslint-disable no-bitwise */
/* eslint-disable prefer-destructuring */
import { HEADING_PATTERN, PDF_ROLE_TO_HTML_ROLE } from './constants';
import type { StructTreeAttributes, StructTreeNode, PdfTagRole } from './types';

export const getRoleAttributes = (node: StructTreeNode) => {
const attributes: StructTreeAttributes = {};
if ('role' in node) {
const { role } = node;
const match = role?.match(HEADING_PATTERN);
if (match) {
attributes.role = 'heading';
attributes['aria-level'] = Number(match[1]);
} else if (role && PDF_ROLE_TO_HTML_ROLE[role as PdfTagRole]) {
attributes.role = PDF_ROLE_TO_HTML_ROLE[role as PdfTagRole] ?? undefined;
}
}
return attributes;
};

export const getStandardAttributes = (node: StructTreeNode): StructTreeAttributes => {
const attributes: StructTreeAttributes = {};
if (node.alt !== undefined) {
attributes['aria-label'] = node.alt;
}
if (node.lang !== undefined) {
attributes.lang = node.lang;
}
if (node.id !== undefined) {
attributes['aria-owns'] = node.id;
}
if (node.children?.length === 1 && node.children[0] && 'id' in node.children[0]) {
return { ...attributes, ...getStandardAttributes(node.children[0]) };
}
return attributes;
};

export const getAttributes = (node: StructTreeNode) => {
if (node) {
return { ...getRoleAttributes(node), ...getStandardAttributes(node) };
}
};

0 comments on commit 19a52b7

Please sign in to comment.