Skip to content

Commit

Permalink
When updating, write the xref table in the same format as the previou…
Browse files Browse the repository at this point in the history
…s one (bug 1878916)

The specs are unclear about what kind of xref table format must be used.
In checking the validity of some pdfs in the preflight tool from Acrobat
we can guess that having the same format is the correct way to do.
The pdf in the mentioned bug, after having been changed, wasn't correctly
displayed in neither Chrome nor Acrobat: it's now fixed.
  • Loading branch information
calixteman committed Feb 13, 2024
1 parent e60329c commit 2133da1
Show file tree
Hide file tree
Showing 5 changed files with 204 additions and 77 deletions.
14 changes: 14 additions & 0 deletions src/core/core_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,19 @@ function getRotationMatrix(rotation, width, height) {
}
}

/**
* Get the number of bytes to use to represent the given positive integer.
* If n is zero, the function returns 0 which means that we don't need to waste
* a byte to represent it.
* @param {number} x - a positive integer.
* @returns {number}
*/
function getSizeInBytes(x) {
// n bits are required for numbers up to 2^n - 1.
// So for a number x, we need ceil(log2(1 + x)) bits.
return Math.ceil(Math.ceil(Math.log2(1 + x)) / 8);
}

export {
arrayBuffersToBytes,
codePointIter,
Expand All @@ -622,6 +635,7 @@ export {
getLookupTableFactory,
getNewAnnotationsMap,
getRotationMatrix,
getSizeInBytes,
isAscii,
isWhiteSpace,
log2,
Expand Down
4 changes: 3 additions & 1 deletion src/core/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import {
getNewAnnotationsMap,
XRefParseException,
} from "./core_utils.js";
import { Dict, Ref } from "./primitives.js";
import { Dict, isDict, Ref } from "./primitives.js";
import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js";
import { AnnotationFactory } from "./annotation.js";
import { clearGlobalCaches } from "./cleanup_helper.js";
Expand Down Expand Up @@ -726,6 +726,8 @@ class WorkerMessageHandler {
acroFormRef,
acroForm,
xfaData,
// Use the same kind of XRef as the previous one.
useXrefStream: isDict(xref.topDict, "XRef"),
}).finally(() => {
xref.resetNewTemporaryRef();
});
Expand Down
177 changes: 118 additions & 59 deletions src/core/writer.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ import { Dict, isName, Name, Ref } from "./primitives.js";
import {
escapePDFName,
escapeString,
getSizeInBytes,
numberToString,
parseXFAPath,
} from "./core_utils.js";
import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js";
import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./crypto.js";
import { Stream } from "./stream.js";

async function writeObject(ref, obj, buffer, { encrypt = null }) {
const transform = encrypt?.createCipherTransform(ref.num, ref.gen);
Expand Down Expand Up @@ -281,6 +283,112 @@ function updateXFA({ xfaData, xfaDatasetsRef, newRefs, xref }) {
newRefs.push({ ref: xfaDatasetsRef, data });
}

async function getXRefTable(xrefInfo, baseOffset, newRefs, newXref, buffer) {
buffer.push("xref\n");
const indexes = getIndexes(newRefs);
let indexesPosition = 0;
for (const { ref, data } of newRefs) {
if (ref.num === indexes[indexesPosition]) {
buffer.push(
`${indexes[indexesPosition]} ${indexes[indexesPosition + 1]}\n`
);
indexesPosition += 2;
}
// The EOL is \r\n to make sure that every entry is exactly 20 bytes long.
// (see 7.5.4 - Cross-Reference Table).
buffer.push(
`${baseOffset.toString().padStart(10, "0")} ${Math.min(ref.gen, 0xffff).toString().padStart(5, "0")} n\r\n`
);
baseOffset += data.length;
}
computeIDs(baseOffset, xrefInfo, newXref);
buffer.push("trailer\n");
await writeDict(newXref, buffer);
buffer.push("\nstartxref\n", baseOffset.toString(), "\n%%EOF\n");
}

function getIndexes(newRefs) {
const indexes = [];
for (const { ref } of newRefs) {
if (ref.num === indexes.at(-2) + indexes.at(-1)) {
indexes[indexes.length - 1] += 1;
} else {
indexes.push(ref.num, 1);
}
}
return indexes;
}

async function getXRefStreamTable(
xrefInfo,
baseOffset,
newRefs,
newXref,
buffer
) {
const xrefTableData = [];
let maxOffset = 0;
let maxGen = 0;
for (const { ref, data } of newRefs) {
maxOffset = Math.max(maxOffset, baseOffset);
const gen = Math.min(ref.gen, 0xffff);
maxGen = Math.max(maxGen, gen);
xrefTableData.push([1, baseOffset, gen]);
baseOffset += data.length;
}
newXref.set("Index", getIndexes(newRefs));
const offsetSize = getSizeInBytes(maxOffset);
const maxGenSize = getSizeInBytes(maxGen);
const sizes = [1, offsetSize, maxGenSize];
newXref.set("W", sizes);
computeIDs(baseOffset, xrefInfo, newXref);

const structSize = sizes.reduce((a, x) => a + x, 0);
const data = new Uint8Array(structSize * xrefTableData.length);
const stream = new Stream(data);
stream.dict = newXref;

let offset = 0;
for (const [type, objOffset, gen] of xrefTableData) {
offset = writeInt(type, sizes[0], offset, data);
offset = writeInt(objOffset, sizes[1], offset, data);
offset = writeInt(gen, sizes[2], offset, data);
}

await writeObject(xrefInfo.newRef, stream, buffer, {});
buffer.push("startxref\n", baseOffset.toString(), "\n%%EOF\n");
}

function computeIDs(baseOffset, xrefInfo, newXref) {
if (Array.isArray(xrefInfo.fileIds) && xrefInfo.fileIds.length > 0) {
const md5 = computeMD5(baseOffset, xrefInfo);
newXref.set("ID", [xrefInfo.fileIds[0], md5]);
}
}

function getTrailerDict(xrefInfo, newRefs, useXrefStream) {
const newXref = new Dict(null);
newXref.set("Prev", xrefInfo.startXRef);
const refForXrefTable = xrefInfo.newRef;
if (useXrefStream) {
newRefs.push({ ref: refForXrefTable, data: "" });
newXref.set("Size", refForXrefTable.num + 1);
newXref.set("Type", Name.get("XRef"));
} else {
newXref.set("Size", refForXrefTable.num);
}
if (xrefInfo.rootRef !== null) {
newXref.set("Root", xrefInfo.rootRef);
}
if (xrefInfo.infoRef !== null) {
newXref.set("Info", xrefInfo.infoRef);
}
if (xrefInfo.encryptRef !== null) {
newXref.set("Encrypt", xrefInfo.encryptRef);
}
return newXref;
}

async function incrementalUpdate({
originalData,
xrefInfo,
Expand All @@ -293,6 +401,7 @@ async function incrementalUpdate({
acroFormRef = null,
acroForm = null,
xfaData = null,
useXrefStream = false,
}) {
await updateAcroform({
xref,
Expand All @@ -314,9 +423,6 @@ async function incrementalUpdate({
});
}

const newXref = new Dict(null);
const refForXrefTable = xrefInfo.newRef;

let buffer, baseOffset;
const lastByte = originalData.at(-1);
if (lastByte === /* \n */ 0x0a || lastByte === /* \r */ 0x0d) {
Expand All @@ -328,60 +434,23 @@ async function incrementalUpdate({
baseOffset = originalData.length + 1;
}

newXref.set("Size", refForXrefTable.num + 1);
newXref.set("Prev", xrefInfo.startXRef);
newXref.set("Type", Name.get("XRef"));

if (xrefInfo.rootRef !== null) {
newXref.set("Root", xrefInfo.rootRef);
}
if (xrefInfo.infoRef !== null) {
newXref.set("Info", xrefInfo.infoRef);
}
if (xrefInfo.encryptRef !== null) {
newXref.set("Encrypt", xrefInfo.encryptRef);
}

// Add a ref for the new xref and sort them
newRefs.push({ ref: refForXrefTable, data: "" });
const newXref = getTrailerDict(xrefInfo, newRefs, useXrefStream);
newRefs = newRefs.sort(
(a, b) => /* compare the refs */ a.ref.num - b.ref.num
);

const xrefTableData = [[0, 1, 0xffff]];
const indexes = [0, 1];
let maxOffset = 0;
for (const { ref, data } of newRefs) {
maxOffset = Math.max(maxOffset, baseOffset);
xrefTableData.push([1, baseOffset, Math.min(ref.gen, 0xffff)]);
baseOffset += data.length;
indexes.push(ref.num, 1);
for (const { data } of newRefs) {
buffer.push(data);
}

newXref.set("Index", indexes);

if (Array.isArray(xrefInfo.fileIds) && xrefInfo.fileIds.length > 0) {
const md5 = computeMD5(baseOffset, xrefInfo);
newXref.set("ID", [xrefInfo.fileIds[0], md5]);
}

const offsetSize = Math.ceil(Math.log2(maxOffset) / 8);
const sizes = [1, offsetSize, 2];
const structSize = sizes[0] + sizes[1] + sizes[2];
const tableLength = structSize * xrefTableData.length;
newXref.set("W", sizes);
newXref.set("Length", tableLength);

buffer.push(`${refForXrefTable.num} ${refForXrefTable.gen} obj\n`);
await writeDict(newXref, buffer, null);
buffer.push(" stream\n");
await (useXrefStream
? getXRefStreamTable(xrefInfo, baseOffset, newRefs, newXref, buffer)
: getXRefTable(xrefInfo, baseOffset, newRefs, newXref, buffer));

const bufferLen = buffer.reduce((a, str) => a + str.length, 0);
const footer = `\nendstream\nendobj\nstartxref\n${baseOffset}\n%%EOF\n`;
const array = new Uint8Array(
originalData.length + bufferLen + tableLength + footer.length
const totalLength = buffer.reduce(
(a, str) => a + str.length,
originalData.length
);
const array = new Uint8Array(totalLength);

// Original data
array.set(originalData);
Expand All @@ -393,16 +462,6 @@ async function incrementalUpdate({
offset += str.length;
}

// New xref table
for (const [type, objOffset, gen] of xrefTableData) {
offset = writeInt(type, sizes[0], offset, array);
offset = writeInt(objOffset, sizes[1], offset, array);
offset = writeInt(gen, sizes[2], offset, array);
}

// Add the footer
writeString(footer, offset, array);

return array;
}

Expand Down
18 changes: 18 additions & 0 deletions test/unit/core_utils_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
escapePDFName,
escapeString,
getInheritableProperty,
getSizeInBytes,
isAscii,
isWhiteSpace,
log2,
Expand Down Expand Up @@ -468,4 +469,21 @@ describe("core_utils", function () {
);
});
});

describe("getSizeInBytes", function () {
it("should get the size in bytes to use to represent a positive integer", function () {
expect(getSizeInBytes(0)).toEqual(0);
for (let i = 1; i <= 0xff; i++) {
expect(getSizeInBytes(i)).toEqual(1);
}

for (let i = 0x100; i <= 0xffff; i += 0x100) {
expect(getSizeInBytes(i)).toEqual(2);
}

for (let i = 0x10000; i <= 0xffffff; i += 0x10000) {
expect(getSizeInBytes(i)).toEqual(3);
}
});
});
});
Loading

0 comments on commit 2133da1

Please sign in to comment.