Skip to content

Commit

Permalink
#79 - Link implementation for PDF/UA with testcase.
Browse files Browse the repository at this point in the history
  • Loading branch information
danfickle committed Jan 30, 2019
1 parent abd36ee commit abb524d
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,6 @@ public static void main(String... args) throws Exception {
run("lists");
run("tables");
run("bookmarks");
run("links");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<html>
<head>
<title>Simple PDF/UA Link Testcase</title>
<meta name="description" content="A simple link example"/>
<style>
@page {
size: 200px 200px;
margin: 0;
}
body {
margin: 0;
width: 200px;
}
</style>
</head>
<body style="font-family: 'TestFont'; font-size: 14px;">
<p>This is a link to <a title="Go to Google!" href="https://google.com">Google (external)</a>.</p>
<p>This is an internal link to the <a title="Go to end of document." href="#bottom">bottom of the document</a>.</p>
<p>This is some text to push the bottom of the document to page 3. OK here is some more and some more!</p>
<p>Paragraph one. Some text that goes over multiple lines. OK, this is getting to the required length.</p>
<p>Paragraph two. Some text that goes over multiple lines. OK, this is getting to the required length.</p>
<p>Paragraph three. Some text that goes over multiple lines. OK, this is getting to the required length.</p>
<p id="bottom">And this is the bottom of the document finally!</p>
</body>
</html>
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import java.awt.geom.Rectangle2D;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
Expand All @@ -18,11 +19,14 @@
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDAttributeObject;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDMarkedContentReference;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDObjectReference;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureElement;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.Revisions;
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
import org.apache.pdfbox.pdmodel.documentinterchange.taggedpdf.StandardStructureTypes;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;
import org.w3c.dom.Document;

import com.openhtmltopdf.css.constants.CSSName;
Expand All @@ -38,7 +42,9 @@
import com.openhtmltopdf.util.XRLog;

public class PdfBoxAccessibilityHelper {
private final List<List<GenericContentItem>> _pageContentItems = new ArrayList<>();
// This maps from page to a list of content items.
private final Map<PDPage, List<GenericContentItem>> _pageContentItems = new LinkedHashMap<>();
private final Map<PDPage, List<AnnotationWithStructureParent>> _pageAnnotations = new HashMap<>();
private final PdfBoxFastOutputDevice _od;
private final Box _rootBox;
private final Document _doc;
Expand All @@ -47,6 +53,9 @@ public class PdfBoxAccessibilityHelper {
private static final Map<String, Supplier<AbstractStructualElement>> _tagSuppliers;

private int _nextMcid;

// These change with every page.
private List<GenericContentItem> _contentItems;
private PdfContentStreamAdapter _cs;
private RenderingContext _ctx;
private PDPage _page;
Expand All @@ -71,6 +80,8 @@ private static Map<String, Supplier<AbstractStructualElement>> createTagSupplier
suppliers.put("td", TableCellStructualElement::new);
suppliers.put("th", TableHeaderStructualElement::new);

suppliers.put("a", AnchorStuctualElement::new);

return suppliers;
}

Expand Down Expand Up @@ -116,6 +127,15 @@ void addChild(AbstractTreeItem child) {
}
}

private static class AnchorStuctualElement extends GenericStructualElement {
String titleText;

@Override
String getPdfTag() {
return StandardStructureTypes.LINK;
}
}

private static class ListStructualElement extends AbstractStructualElement {
final List<ListItemStructualElement> listItems = new ArrayList<>();

Expand Down Expand Up @@ -315,30 +335,39 @@ public void finishPdfUa() {

_od.getWriter().getDocumentCatalog().setStructureTreeRoot(root);
}

}

public void finishNumberTree() {
COSArray numTree = new COSArray();
int i = 0;

for (int i = 0; i < _pageContentItems.size(); i++) {
PDPage page = _od.getWriter().getPage(i);
List<GenericContentItem> pageItems = _pageContentItems.get(i);
for (Map.Entry<PDPage, List<GenericContentItem>> entry : _pageContentItems.entrySet()) {
List<GenericContentItem> pageItems = entry.getValue();
List<AnnotationWithStructureParent> pageAnnotations = _pageAnnotations.get(entry.getKey());

COSArray mcidParentReferences = new COSArray();
for (GenericContentItem item : pageItems) {
System.out.println("%%%%%%%item = " + item + ", parent = " + item.parentElem + ", mcid == " + item.mcid);
mcidParentReferences.add(item.parentElem);
}
pageItems.forEach(itm -> mcidParentReferences.add(itm.parentElem));

numTree.add(COSInteger.get(i));
numTree.add(mcidParentReferences);

page.getCOSObject().setItem(COSName.STRUCT_PARENTS, COSInteger.get(i));
entry.getKey().getCOSObject().setItem(COSName.STRUCT_PARENTS, COSInteger.get(i));
entry.getKey().getCOSObject().setItem(COSName.getPDFName("Tabs"), COSName.S);
i++;

for (AnnotationWithStructureParent annot : pageAnnotations) {
numTree.add(COSInteger.get(i));
numTree.add(annot.structureParent);
annot.annotation.setStructParent(i);
i++;
}
}

COSDictionary dict = new COSDictionary();
dict.setItem(COSName.NUMS, numTree);

PDNumberTreeNode numberTreeNode = new PDNumberTreeNode(dict, dict.getClass());
_od.getWriter().getDocumentCatalog().getStructureTreeRoot().setParentTreeNextKey(_pageContentItems.size());
_od.getWriter().getDocumentCatalog().getStructureTreeRoot().setParentTreeNextKey(i);
_od.getWriter().getDocumentCatalog().getStructureTreeRoot().setParentTree(numberTreeNode);
}

Expand Down Expand Up @@ -441,6 +470,19 @@ private void finishTreeItem(AbstractTreeItem item, AbstractStructualElement pare
child.parentElem.appendKid(child.elem);

finishTreeItem(child.content, child);
} else if (item instanceof AnchorStuctualElement) {
AnchorStuctualElement child = (AnchorStuctualElement) item;

createPdfStrucureElement(parent, child);

String alternate = child.titleText;
if (alternate.isEmpty()) {
XRLog.general("PDF/UA - No title text provided for link.");
}
child.elem.setAlternateDescription(alternate);

finishTreeItems(child.children, child);

} else if (item instanceof ListStructualElement) {
ListStructualElement child = (ListStructualElement) item;

Expand Down Expand Up @@ -649,9 +691,10 @@ private AbstractStructualElement createStructureItem(StructureType type, Box box

((TableCellStructualElement) child).colspan = cell.getStyle().getColSpan();
((TableCellStructualElement) child).rowspan = cell.getStyle().getRowSpan();
} else if (child instanceof AnchorStuctualElement) {
((AnchorStuctualElement) child).titleText = box.getElement() != null ? box.getElement().getAttribute("title") : "";
}


return child;
}

Expand Down Expand Up @@ -685,13 +728,18 @@ private GenericContentItem createMarkedContentStructureItem(StructureType type,
GenericContentItem current = new GenericContentItem();

ensureAncestorTree(current, box.getParent());
ensureParent(box, current);
//ensureParent(box, current);

AbstractStructualElement parent = (AbstractStructualElement) box.getAccessibilityObject();
parent.addChild(current);
current.parent = parent;


current.mcid = _nextMcid;
current.dict = createMarkedContentDictionary();
current.page = _page;

_pageContentItems.get(_pageContentItems.size() - 1).add(current);
_contentItems.add(current);

return current;
}
Expand All @@ -707,7 +755,7 @@ private GenericContentItem createListItemLabelMarkedContent(StructureType type,
li.label.addChild(current);
current.parent = li.label;

_pageContentItems.get(_pageContentItems.size() - 1).add(current);
_contentItems.add(current);

return current;
}
Expand All @@ -734,7 +782,7 @@ private FigureContentItem createFigureContentStructureItem(StructureType type, B

parent.content = current;

_pageContentItems.get(_pageContentItems.size() - 1).add(current);
_contentItems.add(current);

return current;
}
Expand Down Expand Up @@ -796,16 +844,11 @@ public Token startStructure(StructureType type, Box box) {
return FALSE_TOKEN;
}
case INLINE: {
// Only create a structual element holder for this element if it has non text child nodes.
if (box.getChildCount() > 0 ||
(box instanceof InlineLayoutBox && !((InlineLayoutBox) box).isAllTextItems(_ctx))) {

AbstractStructualElement struct = (AbstractStructualElement) box.getAccessibilityObject();
if (struct == null) {
struct = createStructureItem(type, box);
setupStructureElement(struct, box);
}
}
return FALSE_TOKEN;
}
case BACKGROUND: {
Expand Down Expand Up @@ -887,10 +930,35 @@ public void startPage(PDPage page, PdfContentStreamAdapter cs, RenderingContext
this._page = page;
this._pageHeight = pageHeight;
this._transform = transform;
this._pageContentItems.add(new ArrayList<>());
this._contentItems = new ArrayList<>();
this._pageContentItems.put(page, this._contentItems);
this._pageAnnotations.put(page, new ArrayList<>());
}

public void endPage() {

}

private static class AnnotationWithStructureParent {
PDStructureElement structureParent;
PDAnnotation annotation;
}

public void addLink(Box anchor, Box target, PDAnnotationLink annotation, PDPage page) {
PDStructureElement struct = getStructualElementForBox(anchor);
if (struct != null) {
// We have to append the link annotationobject reference as a kid of its associated structure element.
PDObjectReference ref = new PDObjectReference();
ref.setReferencedObject(annotation);
struct.appendKid(ref);

// We also need to save the pair so we can add it to the number tree for reverse lookup.
AnnotationWithStructureParent annotStructParentPair = new AnnotationWithStructureParent();
annotStructParentPair.annotation = annotation;
annotStructParentPair.structureParent = struct;

_pageAnnotations.get(page).add(annotStructParentPair);
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDObjectReference;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureElement;
import org.apache.pdfbox.pdmodel.interactive.action.PDAction;
import org.apache.pdfbox.pdmodel.interactive.action.PDActionGoTo;
import org.apache.pdfbox.pdmodel.interactive.action.PDActionJavaScript;
Expand All @@ -40,6 +42,7 @@ public class PdfBoxFastLinkManager {
private final Box _root;
private final PdfBoxFastOutputDevice _od;
private final List<LinkDetails> _links;
private PdfBoxAccessibilityHelper _pdfUa;

public PdfBoxFastLinkManager(SharedContext ctx, float dotsPerPoint, Box root, PdfBoxFastOutputDevice od) {
this._sharedContext = ctx;
Expand All @@ -51,6 +54,11 @@ public PdfBoxFastLinkManager(SharedContext ctx, float dotsPerPoint, Box root, Pd
}

private Rectangle2D calcTotalLinkArea(RenderingContext c, Box box, float pageHeight, AffineTransform transform) {
if (_pdfUa != null) {
// For PDF/UA we need one link annotation per box.
return createTargetArea(c, box, pageHeight, transform, _root, _od);
}

Box current = box;
while (true) {
Box prev = current.getPreviousSibling();
Expand Down Expand Up @@ -220,7 +228,7 @@ private void addUriAsLink(RenderingContext c, Box box, PDPage page, float pageHe
if (!placeAnnotation(transform, linkShape, targetArea, annot))
return;

addLinkToPage(page, annot);
addLinkToPage(page, annot, box, target);
} else {
XRLog.general(Level.WARNING, "Could not find valid target for link. Link href = " + uri);
}
Expand All @@ -237,7 +245,7 @@ private void addUriAsLink(RenderingContext c, Box box, PDPage page, float pageHe
if (!placeAnnotation(transform, linkShape, targetArea, annot))
return;

addLinkToPage(page, annot);
addLinkToPage(page, annot, box, null);
}
}

Expand Down Expand Up @@ -323,7 +331,7 @@ private float[] mapShapeToQuadPoints(AffineTransform transform, Shape linkShape,
return ret;
}

private void addLinkToPage(PDPage page, PDAnnotationLink annot) {
private void addLinkToPage(PDPage page, PDAnnotationLink annot, Box anchor, Box target) {
PDBorderStyleDictionary styleDict = new PDBorderStyleDictionary();
styleDict.setWidth(0);
styleDict.setStyle(PDBorderStyleDictionary.STYLE_SOLID);
Expand All @@ -338,6 +346,10 @@ private void addLinkToPage(PDPage page, PDAnnotationLink annot) {
}

annots.add(annot);

if (_pdfUa != null) {
_pdfUa.addLink(anchor, target, annot, page);
}
} catch (IOException e) {
throw new PdfContentStreamAdapter.PdfException("processLink", e);
}
Expand Down Expand Up @@ -396,7 +408,8 @@ public void processLinkLater(RenderingContext c, Box box, PDPage page, float pag
}
}

public void processLinks() {
public void processLinks(PdfBoxAccessibilityHelper pdfUa) {
this._pdfUa = pdfUa;
for (LinkDetails link : _links) {
processLink(link.c, link.box, link.page, link.pageHeight, link.transform);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -846,17 +846,22 @@ public void start(Document doc) {
}

public void finish(RenderingContext c, Box root) {
processControls();
_linkManager.processLinks();

if (_pdfUa != null) {
_pdfUa.finishPdfUa();
}

// Bookmarks must come after PDF/UA structual tree creation
// because bookamrks link to structual elements in the tree.
// because bookmarks link to structual elements in the tree.
_bmManager.loadBookmarks();
_bmManager.writeOutline(c, root);

// Also need access to the structure tree.
processControls();
_linkManager.processLinks(_pdfUa);

if (_pdfUa != null) {
_pdfUa.finishNumberTree();
}
}

@Override
Expand Down

0 comments on commit abb524d

Please sign in to comment.