Skip to content

Commit

Permalink
Merge pull request #17636 from calixteman/bug1878916
Browse files Browse the repository at this point in the history
When updating, write the xref table in the same format as the previous one (bug 1878916)
  • Loading branch information
calixteman authored Feb 13, 2024
2 parents e60329c + 2133da1 commit 14874e5
Show file tree
Hide file tree
Showing 5 changed files with 204 additions and 77 deletions.
14 changes: 14 additions & 0 deletions src/core/core_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,19 @@ function getRotationMatrix(rotation, width, height) {
}
}

/**
* Get the number of bytes to use to represent the given positive integer.
* If n is zero, the function returns 0 which means that we don't need to waste
* a byte to represent it.
* @param {number} x - a positive integer.
* @returns {number}
*/
function getSizeInBytes(x) {
// n bits are required for numbers up to 2^n - 1.
// So for a number x, we need ceil(log2(1 + x)) bits.
return Math.ceil(Math.ceil(Math.log2(1 + x)) / 8);
}

export {
arrayBuffersToBytes,
codePointIter,
Expand All @@ -622,6 +635,7 @@ export {
getLookupTableFactory,
getNewAnnotationsMap,
getRotationMatrix,
getSizeInBytes,
isAscii,
isWhiteSpace,
log2,
Expand Down
4 changes: 3 additions & 1 deletion src/core/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import {
getNewAnnotationsMap,
XRefParseException,
} from "./core_utils.js";
import { Dict, Ref } from "./primitives.js";
import { Dict, isDict, Ref } from "./primitives.js";
import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js";
import { AnnotationFactory } from "./annotation.js";
import { clearGlobalCaches } from "./cleanup_helper.js";
Expand Down Expand Up @@ -726,6 +726,8 @@ class WorkerMessageHandler {
acroFormRef,
acroForm,
xfaData,
// Use the same kind of XRef as the previous one.
useXrefStream: isDict(xref.topDict, "XRef"),
}).finally(() => {
xref.resetNewTemporaryRef();
});
Expand Down
177 changes: 118 additions & 59 deletions src/core/writer.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ import { Dict, isName, Name, Ref } from "./primitives.js";
import {
escapePDFName,
escapeString,
getSizeInBytes,
numberToString,
parseXFAPath,
} from "./core_utils.js";
import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js";
import { BaseStream } from "./base_stream.js";
import { calculateMD5 } from "./crypto.js";
import { Stream } from "./stream.js";

async function writeObject(ref, obj, buffer, { encrypt = null }) {
const transform = encrypt?.createCipherTransform(ref.num, ref.gen);
Expand Down Expand Up @@ -281,6 +283,112 @@ function updateXFA({ xfaData, xfaDatasetsRef, newRefs, xref }) {
newRefs.push({ ref: xfaDatasetsRef, data });
}

async function getXRefTable(xrefInfo, baseOffset, newRefs, newXref, buffer) {
buffer.push("xref\n");
const indexes = getIndexes(newRefs);
let indexesPosition = 0;
for (const { ref, data } of newRefs) {
if (ref.num === indexes[indexesPosition]) {
buffer.push(
`${indexes[indexesPosition]} ${indexes[indexesPosition + 1]}\n`
);
indexesPosition += 2;
}
// The EOL is \r\n to make sure that every entry is exactly 20 bytes long.
// (see 7.5.4 - Cross-Reference Table).
buffer.push(
`${baseOffset.toString().padStart(10, "0")} ${Math.min(ref.gen, 0xffff).toString().padStart(5, "0")} n\r\n`
);
baseOffset += data.length;
}
computeIDs(baseOffset, xrefInfo, newXref);
buffer.push("trailer\n");
await writeDict(newXref, buffer);
buffer.push("\nstartxref\n", baseOffset.toString(), "\n%%EOF\n");
}

function getIndexes(newRefs) {
const indexes = [];
for (const { ref } of newRefs) {
if (ref.num === indexes.at(-2) + indexes.at(-1)) {
indexes[indexes.length - 1] += 1;
} else {
indexes.push(ref.num, 1);
}
}
return indexes;
}

async function getXRefStreamTable(
xrefInfo,
baseOffset,
newRefs,
newXref,
buffer
) {
const xrefTableData = [];
let maxOffset = 0;
let maxGen = 0;
for (const { ref, data } of newRefs) {
maxOffset = Math.max(maxOffset, baseOffset);
const gen = Math.min(ref.gen, 0xffff);
maxGen = Math.max(maxGen, gen);
xrefTableData.push([1, baseOffset, gen]);
baseOffset += data.length;
}
newXref.set("Index", getIndexes(newRefs));
const offsetSize = getSizeInBytes(maxOffset);
const maxGenSize = getSizeInBytes(maxGen);
const sizes = [1, offsetSize, maxGenSize];
newXref.set("W", sizes);
computeIDs(baseOffset, xrefInfo, newXref);

const structSize = sizes.reduce((a, x) => a + x, 0);
const data = new Uint8Array(structSize * xrefTableData.length);
const stream = new Stream(data);
stream.dict = newXref;

let offset = 0;
for (const [type, objOffset, gen] of xrefTableData) {
offset = writeInt(type, sizes[0], offset, data);
offset = writeInt(objOffset, sizes[1], offset, data);
offset = writeInt(gen, sizes[2], offset, data);
}

await writeObject(xrefInfo.newRef, stream, buffer, {});
buffer.push("startxref\n", baseOffset.toString(), "\n%%EOF\n");
}

function computeIDs(baseOffset, xrefInfo, newXref) {
if (Array.isArray(xrefInfo.fileIds) && xrefInfo.fileIds.length > 0) {
const md5 = computeMD5(baseOffset, xrefInfo);
newXref.set("ID", [xrefInfo.fileIds[0], md5]);
}
}

function getTrailerDict(xrefInfo, newRefs, useXrefStream) {
const newXref = new Dict(null);
newXref.set("Prev", xrefInfo.startXRef);
const refForXrefTable = xrefInfo.newRef;
if (useXrefStream) {
newRefs.push({ ref: refForXrefTable, data: "" });
newXref.set("Size", refForXrefTable.num + 1);
newXref.set("Type", Name.get("XRef"));
} else {
newXref.set("Size", refForXrefTable.num);
}
if (xrefInfo.rootRef !== null) {
newXref.set("Root", xrefInfo.rootRef);
}
if (xrefInfo.infoRef !== null) {
newXref.set("Info", xrefInfo.infoRef);
}
if (xrefInfo.encryptRef !== null) {
newXref.set("Encrypt", xrefInfo.encryptRef);
}
return newXref;
}

async function incrementalUpdate({
originalData,
xrefInfo,
Expand All @@ -293,6 +401,7 @@ async function incrementalUpdate({
acroFormRef = null,
acroForm = null,
xfaData = null,
useXrefStream = false,
}) {
await updateAcroform({
xref,
Expand All @@ -314,9 +423,6 @@ async function incrementalUpdate({
});
}

const newXref = new Dict(null);
const refForXrefTable = xrefInfo.newRef;

let buffer, baseOffset;
const lastByte = originalData.at(-1);
if (lastByte === /* \n */ 0x0a || lastByte === /* \r */ 0x0d) {
Expand All @@ -328,60 +434,23 @@ async function incrementalUpdate({
baseOffset = originalData.length + 1;
}

newXref.set("Size", refForXrefTable.num + 1);
newXref.set("Prev", xrefInfo.startXRef);
newXref.set("Type", Name.get("XRef"));

if (xrefInfo.rootRef !== null) {
newXref.set("Root", xrefInfo.rootRef);
}
if (xrefInfo.infoRef !== null) {
newXref.set("Info", xrefInfo.infoRef);
}
if (xrefInfo.encryptRef !== null) {
newXref.set("Encrypt", xrefInfo.encryptRef);
}

// Add a ref for the new xref and sort them
newRefs.push({ ref: refForXrefTable, data: "" });
const newXref = getTrailerDict(xrefInfo, newRefs, useXrefStream);
newRefs = newRefs.sort(
(a, b) => /* compare the refs */ a.ref.num - b.ref.num
);

const xrefTableData = [[0, 1, 0xffff]];
const indexes = [0, 1];
let maxOffset = 0;
for (const { ref, data } of newRefs) {
maxOffset = Math.max(maxOffset, baseOffset);
xrefTableData.push([1, baseOffset, Math.min(ref.gen, 0xffff)]);
baseOffset += data.length;
indexes.push(ref.num, 1);
for (const { data } of newRefs) {
buffer.push(data);
}

newXref.set("Index", indexes);

if (Array.isArray(xrefInfo.fileIds) && xrefInfo.fileIds.length > 0) {
const md5 = computeMD5(baseOffset, xrefInfo);
newXref.set("ID", [xrefInfo.fileIds[0], md5]);
}

const offsetSize = Math.ceil(Math.log2(maxOffset) / 8);
const sizes = [1, offsetSize, 2];
const structSize = sizes[0] + sizes[1] + sizes[2];
const tableLength = structSize * xrefTableData.length;
newXref.set("W", sizes);
newXref.set("Length", tableLength);

buffer.push(`${refForXrefTable.num} ${refForXrefTable.gen} obj\n`);
await writeDict(newXref, buffer, null);
buffer.push(" stream\n");
await (useXrefStream
? getXRefStreamTable(xrefInfo, baseOffset, newRefs, newXref, buffer)
: getXRefTable(xrefInfo, baseOffset, newRefs, newXref, buffer));

const bufferLen = buffer.reduce((a, str) => a + str.length, 0);
const footer = `\nendstream\nendobj\nstartxref\n${baseOffset}\n%%EOF\n`;
const array = new Uint8Array(
originalData.length + bufferLen + tableLength + footer.length
const totalLength = buffer.reduce(
(a, str) => a + str.length,
originalData.length
);
const array = new Uint8Array(totalLength);

// Original data
array.set(originalData);
Expand All @@ -393,16 +462,6 @@ async function incrementalUpdate({
offset += str.length;
}

// New xref table
for (const [type, objOffset, gen] of xrefTableData) {
offset = writeInt(type, sizes[0], offset, array);
offset = writeInt(objOffset, sizes[1], offset, array);
offset = writeInt(gen, sizes[2], offset, array);
}

// Add the footer
writeString(footer, offset, array);

return array;
}

Expand Down
18 changes: 18 additions & 0 deletions test/unit/core_utils_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
escapePDFName,
escapeString,
getInheritableProperty,
getSizeInBytes,
isAscii,
isWhiteSpace,
log2,
Expand Down Expand Up @@ -468,4 +469,21 @@ describe("core_utils", function () {
);
});
});

describe("getSizeInBytes", function () {
it("should get the size in bytes to use to represent a positive integer", function () {
expect(getSizeInBytes(0)).toEqual(0);
for (let i = 1; i <= 0xff; i++) {
expect(getSizeInBytes(i)).toEqual(1);
}

for (let i = 0x100; i <= 0xffff; i += 0x100) {
expect(getSizeInBytes(i)).toEqual(2);
}

for (let i = 0x10000; i <= 0xffffff; i += 0x10000) {
expect(getSizeInBytes(i)).toEqual(3);
}
});
});
});
Loading

0 comments on commit 14874e5

Please sign in to comment.