Skip to content

Commit

Permalink
#79 - Support for lists with testcase in PDF/UA documents.
Browse files Browse the repository at this point in the history
Needs to be made more robust.
  • Loading branch information
danfickle committed Jan 26, 2019
1 parent d954d8b commit 3477d3f
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ public enum StructureType {
FLOAT,
BLOCK,
INLINE,
LIST_MARKER,
REPLACED;
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,9 @@ private void paintListMarkers(RenderingContext c, List<DisplayListItem> blocks)
OperatorSetClip setClip = (OperatorSetClip) dli;
setClip(c, setClip);
} else {
Object token = c.getOutputDevice().startStructure(StructureType.LIST_MARKER, (Box) dli);
((BlockBox) dli).paintListMarker(c);
c.getOutputDevice().endStructure(token);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,6 @@ public static void main(String... args) throws Exception {
run("image");
run("image-over-two-pages");
run("running");
run("lists");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<html>
<head>
<title>Simple PDF/UA List Testcase</title>
<meta name="description" content="A simple list example"/>
<style>
@page {
size: 200px 200px;
margin: 0;
}
body {
margin: 0;
width: 200px;
}
</style>
</head>
<body style="font-family: 'TestFont'; font-size: 14px;">
<h1>A simple list example</h1>
<h2>Unordered list</h2>

<ul>
<li>List item 1</li>
<li>List item 2</li>
<li>
List item 3
<div style="font-size: 16px;">with additional content</div>
</li>
</ul>

<h2>Ordered List</h2>

<ol>
<li>List item 1</li>
<li>List item 2</li>
<li>List item 3</li>
<li>List item 4</li>
<li>List item 5</li>
</ol>

<h2>No list markers</h2>

<ul style="list-style-type: none;">
<li>One</li>
<li>Two</li>
<li>Three</li>
<li>Four</li>
</ul>

</body>
</html>
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.Revisions;
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDMarkedContent;
import org.apache.pdfbox.pdmodel.documentinterchange.taggedpdf.StandardStructureTypes;
import org.apache.xmpbox.type.AbstractStructuredType;
import org.w3c.dom.Document;
import com.openhtmltopdf.extend.StructureType;
import com.openhtmltopdf.render.BlockBox;
import com.openhtmltopdf.render.Box;
import com.openhtmltopdf.render.InlineLayoutBox;
import com.openhtmltopdf.render.LineBox;
import com.openhtmltopdf.render.MarkerData;
import com.openhtmltopdf.render.RenderingContext;
import com.openhtmltopdf.util.XRLog;

Expand Down Expand Up @@ -127,29 +129,30 @@ private static class ListItemStructualElement extends AbstractStructualElement {
ListLabelStructualElement label;
ListBodyStructualElement body;

ListItemStructualElement() {
this.body = new ListBodyStructualElement();
this.body.parent = this;

this.label = new ListLabelStructualElement();
this.label.parent = this;
}

@Override
String getPdfTag() {
return StandardStructureTypes.LI;
}

@Override
void addChild(AbstractTreeItem child) {
// TODO
this.body.addChild(child);
}
}

private static class ListLabelStructualElement extends AbstractStructualElement {
GenericContentItem content;

private static class ListLabelStructualElement extends GenericStructualElement {
@Override
String getPdfTag() {
return StandardStructureTypes.LBL;
}

@Override
void addChild(AbstractTreeItem child) {
this.content = (GenericContentItem) child;
}
}

private static class ListBodyStructualElement extends GenericStructualElement {
Expand Down Expand Up @@ -348,16 +351,38 @@ private void finishTreeItem(AbstractTreeItem item, AbstractStructualElement pare
finishTreeItem(child.content, child);
} else if (item instanceof ListStructualElement) {
ListStructualElement child = (ListStructualElement) item;
// TODO

createPdfStrucureElement(parent, child);

child.listItems.forEach(itm -> finishTreeItem(itm, child));

} else if (item instanceof ListItemStructualElement) {
ListItemStructualElement child = (ListItemStructualElement) item;
// TODO

createPdfStrucureElement(parent, child);

finishTreeItem(child.label, child);
finishTreeItem(child.body, child);

} else if (item instanceof ListLabelStructualElement) {
ListLabelStructualElement child = (ListLabelStructualElement) item;
// TODO

if (child.children.isEmpty()) {
// Must be list-style-type: none.
return;
}

createPdfStrucureElement(parent, child);

child.children.forEach(itm -> finishTreeItem(itm, child));

} else if (item instanceof ListBodyStructualElement) {
ListBodyStructualElement child = (ListBodyStructualElement) item;
// TODO

createPdfStrucureElement(parent, child);

child.children.forEach(itm -> finishTreeItem(itm, child));

} else if (item instanceof GenericStructualElement) {
// A structual element such as Div, Sect, p, etc
// which contains other structual elements or content items (text).
Expand All @@ -373,18 +398,22 @@ private void finishTreeItem(AbstractTreeItem item, AbstractStructualElement pare
// We skip line boxes in the tree.
child.children.forEach(itm -> finishTreeItem(itm, parent));
} else {
child.parentElem = parent.elem;
child.elem = new PDStructureElement(child.getPdfTag(), child.parentElem);
child.elem.setParent(child.parentElem);
child.elem.setPage(child.page);

child.parentElem.appendKid(child.elem);
createPdfStrucureElement(parent, child);

// Recursively, depth first, process the structual tree.
child.children.forEach(itm -> finishTreeItem(itm, child));
}
}
}

private void createPdfStrucureElement(AbstractStructualElement parent, AbstractStructualElement child) {
child.parentElem = parent.elem;
child.elem = new PDStructureElement(child.getPdfTag(), child.parentElem);
child.elem.setParent(child.parentElem);
child.elem.setPage(child.page);

child.parentElem.appendKid(child.elem);
}

private COSDictionary createMarkedContentDictionary() {
COSDictionary dict = new COSDictionary();
Expand All @@ -408,9 +437,8 @@ private void ensureAncestorTree(AbstractTreeItem child, Box parent) {
}

private AbstractStructualElement createStructureItem(StructureType type, Box box) {
AbstractStructualElement child = (AbstractStructualElement) box.getAccessibilityObject();
AbstractStructualElement child = null;

if (child == null) {
if (box instanceof BlockBox) {
BlockBox bb = (BlockBox) box;

Expand All @@ -427,9 +455,9 @@ private AbstractStructualElement createStructureItem(StructureType type, Box box
(float) rect.getHeight());
((FigureStructualElement) child).alternateText = box.getElement() == null ? "" : box.getElement().getAttribute("alt");
}
}
}

if (child == null && box.getElement() != null) {
if (child == null && box.getElement() != null && !box.isAnonymous()) {
String htmlTag = box.getElement().getTagName();
Supplier<AbstractStructualElement> supplier = _tagSuppliers.get(htmlTag);

Expand All @@ -444,16 +472,18 @@ private AbstractStructualElement createStructureItem(StructureType type, Box box

child.page = _page;
child.box = box;
box.setAccessiblityObject(child);

ensureAncestorTree(child, box.getParent());
ensureParent(box, child);
}

return child;
return child;
}

private void setupStructureElement(AbstractStructualElement child, Box box) {
box.setAccessiblityObject(child);

ensureAncestorTree(child, box.getParent());
ensureParent(box, child);
}

public void ensureParent(Box box, AbstractTreeItem child) {
private void ensureParent(Box box, AbstractTreeItem child) {
if (child.parent == null) {
if (box.getParent() != null) {
AbstractStructualElement parent = (AbstractStructualElement) box.getParent().getAccessibilityObject();
Expand Down Expand Up @@ -481,6 +511,23 @@ private GenericContentItem createMarkedContentStructureItem(StructureType type,
return current;
}

private GenericContentItem createListItemLabelMarkedContent(StructureType type, Box box) {
GenericContentItem current = new GenericContentItem();

current.mcid = _nextMcid;
current.dict = createMarkedContentDictionary();
current.page = _page;

ListItemStructualElement li = (ListItemStructualElement) box.getAccessibilityObject();
li.label.addChild(current);
current.parent = li.label;

_pageContentItems.get(_pageContentItems.size() - 1).add(current);

return current;
}


private FigureContentItem createFigureContentStructureItem(StructureType type, Box box) {
FigureStructualElement parent = (FigureStructualElement) box.getAccessibilityObject();

Expand Down Expand Up @@ -556,14 +603,23 @@ public Token startStructure(StructureType type, Box box) {
case LAYER:
case FLOAT:
case BLOCK: {
createStructureItem(type, box);
AbstractStructualElement struct = (AbstractStructualElement) box.getAccessibilityObject();
if (struct == null) {
struct = createStructureItem(type, box);
setupStructureElement(struct, box);
}
return FALSE_TOKEN;
}
case INLINE: {
// Only create a structual element holder for this element if it has non text child nodes.
if (box.getChildCount() > 0 ||
(box instanceof InlineLayoutBox && !((InlineLayoutBox) box).isAllTextItems(_ctx))) {
createStructureItem(type, box);

AbstractStructualElement struct = (AbstractStructualElement) box.getAccessibilityObject();
if (struct == null) {
struct = createStructureItem(type, box);
setupStructureElement(struct, box);
}
}
return FALSE_TOKEN;
}
Expand All @@ -575,13 +631,34 @@ public Token startStructure(StructureType type, Box box) {
}
return FALSE_TOKEN;
}
case LIST_MARKER: {
if (box instanceof BlockBox) {
MarkerData markers = ((BlockBox) box).getMarkerData();

if (markers == null ||
(markers.getGlyphMarker() == null &&
markers.getTextMarker() == null &&
markers.getImageMarker() == null)) {
return FALSE_TOKEN;
}
}

GenericContentItem current = createListItemLabelMarkedContent(type, box);
_cs.beginMarkedContent(COSName.getPDFName("Span"), current.dict);
return TRUE_TOKEN;
}
case TEXT: {
GenericContentItem current = createMarkedContentStructureItem(type, box);
_cs.beginMarkedContent(COSName.getPDFName(StandardStructureTypes.SPAN), current.dict);
return TRUE_TOKEN;
}
case REPLACED: {
createStructureItem(type, box);
AbstractStructualElement struct = (AbstractStructualElement) box.getAccessibilityObject();
if (struct == null) {
struct = createStructureItem(type, box);
setupStructureElement(struct, box);
}

FigureContentItem current = createFigureContentStructureItem(type, box);

if (current != null) {
Expand Down

0 comments on commit 3477d3f

Please sign in to comment.