diff --git a/src/core/core_utils.js b/src/core/core_utils.js index a9bd298c730b5..c3de1210fb883 100644 --- a/src/core/core_utils.js +++ b/src/core/core_utils.js @@ -611,6 +611,19 @@ function getRotationMatrix(rotation, width, height) { } } +/** + * Get the number of bytes to use to represent the given positive integer. + * If n is zero, the function returns 0 which means that we don't need to waste + * a byte to represent it. + * @param {number} x - a positive integer. + * @returns {number} + */ +function getSizeInBytes(x) { + // n bits are required for numbers up to 2^n - 1. + // So for a number x, we need ceil(log2(1 + x)) bits. + return Math.ceil(Math.ceil(Math.log2(1 + x)) / 8); +} + export { arrayBuffersToBytes, codePointIter, @@ -622,6 +635,7 @@ export { getLookupTableFactory, getNewAnnotationsMap, getRotationMatrix, + getSizeInBytes, isAscii, isWhiteSpace, log2, diff --git a/src/core/worker.js b/src/core/worker.js index 70fa1c24926d2..e407577785cd3 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -35,7 +35,7 @@ import { getNewAnnotationsMap, XRefParseException, } from "./core_utils.js"; -import { Dict, Ref } from "./primitives.js"; +import { Dict, isDict, Ref } from "./primitives.js"; import { LocalPdfManager, NetworkPdfManager } from "./pdf_manager.js"; import { AnnotationFactory } from "./annotation.js"; import { clearGlobalCaches } from "./cleanup_helper.js"; @@ -726,6 +726,8 @@ class WorkerMessageHandler { acroFormRef, acroForm, xfaData, + // Use the same kind of XRef as the previous one. + useXrefStream: isDict(xref.topDict, "XRef"), }).finally(() => { xref.resetNewTemporaryRef(); }); diff --git a/src/core/writer.js b/src/core/writer.js index 40941659658a5..99fa65555adb9 100644 --- a/src/core/writer.js +++ b/src/core/writer.js @@ -18,12 +18,14 @@ import { Dict, isName, Name, Ref } from "./primitives.js"; import { escapePDFName, escapeString, + getSizeInBytes, numberToString, parseXFAPath, } from "./core_utils.js"; import { SimpleDOMNode, SimpleXMLParser } from "./xml_parser.js"; import { BaseStream } from "./base_stream.js"; import { calculateMD5 } from "./crypto.js"; +import { Stream } from "./stream.js"; async function writeObject(ref, obj, buffer, { encrypt = null }) { const transform = encrypt?.createCipherTransform(ref.num, ref.gen); @@ -281,6 +283,112 @@ function updateXFA({ xfaData, xfaDatasetsRef, newRefs, xref }) { newRefs.push({ ref: xfaDatasetsRef, data }); } +async function getXRefTable(xrefInfo, baseOffset, newRefs, newXref, buffer) { + buffer.push("xref\n"); + const indexes = getIndexes(newRefs); + let indexesPosition = 0; + for (const { ref, data } of newRefs) { + if (ref.num === indexes[indexesPosition]) { + buffer.push( + `${indexes[indexesPosition]} ${indexes[indexesPosition + 1]}\n` + ); + indexesPosition += 2; + } + // The EOL is \r\n to make sure that every entry is exactly 20 bytes long. + // (see 7.5.4 - Cross-Reference Table). + buffer.push( + `${baseOffset.toString().padStart(10, "0")} ${Math.min(ref.gen, 0xffff).toString().padStart(5, "0")} n\r\n` + ); + baseOffset += data.length; + } + computeIDs(baseOffset, xrefInfo, newXref); + buffer.push("trailer\n"); + await writeDict(newXref, buffer); + buffer.push("\nstartxref\n", baseOffset.toString(), "\n%%EOF\n"); +} + +function getIndexes(newRefs) { + const indexes = []; + for (const { ref } of newRefs) { + if (ref.num === indexes.at(-2) + indexes.at(-1)) { + indexes[indexes.length - 1] += 1; + } else { + indexes.push(ref.num, 1); + } + } + return indexes; +} + +async function getXRefStreamTable( + xrefInfo, + baseOffset, + newRefs, + newXref, + buffer +) { + const xrefTableData = []; + let maxOffset = 0; + let maxGen = 0; + for (const { ref, data } of newRefs) { + maxOffset = Math.max(maxOffset, baseOffset); + const gen = Math.min(ref.gen, 0xffff); + maxGen = Math.max(maxGen, gen); + xrefTableData.push([1, baseOffset, gen]); + baseOffset += data.length; + } + newXref.set("Index", getIndexes(newRefs)); + const offsetSize = getSizeInBytes(maxOffset); + const maxGenSize = getSizeInBytes(maxGen); + const sizes = [1, offsetSize, maxGenSize]; + newXref.set("W", sizes); + computeIDs(baseOffset, xrefInfo, newXref); + + const structSize = sizes.reduce((a, x) => a + x, 0); + const data = new Uint8Array(structSize * xrefTableData.length); + const stream = new Stream(data); + stream.dict = newXref; + + let offset = 0; + for (const [type, objOffset, gen] of xrefTableData) { + offset = writeInt(type, sizes[0], offset, data); + offset = writeInt(objOffset, sizes[1], offset, data); + offset = writeInt(gen, sizes[2], offset, data); + } + + await writeObject(xrefInfo.newRef, stream, buffer, {}); + buffer.push("startxref\n", baseOffset.toString(), "\n%%EOF\n"); +} + +function computeIDs(baseOffset, xrefInfo, newXref) { + if (Array.isArray(xrefInfo.fileIds) && xrefInfo.fileIds.length > 0) { + const md5 = computeMD5(baseOffset, xrefInfo); + newXref.set("ID", [xrefInfo.fileIds[0], md5]); + } +} + +function getTrailerDict(xrefInfo, newRefs, useXrefStream) { + const newXref = new Dict(null); + newXref.set("Prev", xrefInfo.startXRef); + const refForXrefTable = xrefInfo.newRef; + if (useXrefStream) { + newRefs.push({ ref: refForXrefTable, data: "" }); + newXref.set("Size", refForXrefTable.num + 1); + newXref.set("Type", Name.get("XRef")); + } else { + newXref.set("Size", refForXrefTable.num); + } + if (xrefInfo.rootRef !== null) { + newXref.set("Root", xrefInfo.rootRef); + } + if (xrefInfo.infoRef !== null) { + newXref.set("Info", xrefInfo.infoRef); + } + if (xrefInfo.encryptRef !== null) { + newXref.set("Encrypt", xrefInfo.encryptRef); + } + return newXref; +} + async function incrementalUpdate({ originalData, xrefInfo, @@ -293,6 +401,7 @@ async function incrementalUpdate({ acroFormRef = null, acroForm = null, xfaData = null, + useXrefStream = false, }) { await updateAcroform({ xref, @@ -314,9 +423,6 @@ async function incrementalUpdate({ }); } - const newXref = new Dict(null); - const refForXrefTable = xrefInfo.newRef; - let buffer, baseOffset; const lastByte = originalData.at(-1); if (lastByte === /* \n */ 0x0a || lastByte === /* \r */ 0x0d) { @@ -328,60 +434,23 @@ async function incrementalUpdate({ baseOffset = originalData.length + 1; } - newXref.set("Size", refForXrefTable.num + 1); - newXref.set("Prev", xrefInfo.startXRef); - newXref.set("Type", Name.get("XRef")); - - if (xrefInfo.rootRef !== null) { - newXref.set("Root", xrefInfo.rootRef); - } - if (xrefInfo.infoRef !== null) { - newXref.set("Info", xrefInfo.infoRef); - } - if (xrefInfo.encryptRef !== null) { - newXref.set("Encrypt", xrefInfo.encryptRef); - } - - // Add a ref for the new xref and sort them - newRefs.push({ ref: refForXrefTable, data: "" }); + const newXref = getTrailerDict(xrefInfo, newRefs, useXrefStream); newRefs = newRefs.sort( (a, b) => /* compare the refs */ a.ref.num - b.ref.num ); - - const xrefTableData = [[0, 1, 0xffff]]; - const indexes = [0, 1]; - let maxOffset = 0; - for (const { ref, data } of newRefs) { - maxOffset = Math.max(maxOffset, baseOffset); - xrefTableData.push([1, baseOffset, Math.min(ref.gen, 0xffff)]); - baseOffset += data.length; - indexes.push(ref.num, 1); + for (const { data } of newRefs) { buffer.push(data); } - newXref.set("Index", indexes); - - if (Array.isArray(xrefInfo.fileIds) && xrefInfo.fileIds.length > 0) { - const md5 = computeMD5(baseOffset, xrefInfo); - newXref.set("ID", [xrefInfo.fileIds[0], md5]); - } - - const offsetSize = Math.ceil(Math.log2(maxOffset) / 8); - const sizes = [1, offsetSize, 2]; - const structSize = sizes[0] + sizes[1] + sizes[2]; - const tableLength = structSize * xrefTableData.length; - newXref.set("W", sizes); - newXref.set("Length", tableLength); - - buffer.push(`${refForXrefTable.num} ${refForXrefTable.gen} obj\n`); - await writeDict(newXref, buffer, null); - buffer.push(" stream\n"); + await (useXrefStream + ? getXRefStreamTable(xrefInfo, baseOffset, newRefs, newXref, buffer) + : getXRefTable(xrefInfo, baseOffset, newRefs, newXref, buffer)); - const bufferLen = buffer.reduce((a, str) => a + str.length, 0); - const footer = `\nendstream\nendobj\nstartxref\n${baseOffset}\n%%EOF\n`; - const array = new Uint8Array( - originalData.length + bufferLen + tableLength + footer.length + const totalLength = buffer.reduce( + (a, str) => a + str.length, + originalData.length ); + const array = new Uint8Array(totalLength); // Original data array.set(originalData); @@ -393,16 +462,6 @@ async function incrementalUpdate({ offset += str.length; } - // New xref table - for (const [type, objOffset, gen] of xrefTableData) { - offset = writeInt(type, sizes[0], offset, array); - offset = writeInt(objOffset, sizes[1], offset, array); - offset = writeInt(gen, sizes[2], offset, array); - } - - // Add the footer - writeString(footer, offset, array); - return array; } diff --git a/test/unit/core_utils_spec.js b/test/unit/core_utils_spec.js index bc7266ba1c4fd..2e0fd8fa94e78 100644 --- a/test/unit/core_utils_spec.js +++ b/test/unit/core_utils_spec.js @@ -19,6 +19,7 @@ import { escapePDFName, escapeString, getInheritableProperty, + getSizeInBytes, isAscii, isWhiteSpace, log2, @@ -468,4 +469,21 @@ describe("core_utils", function () { ); }); }); + + describe("getSizeInBytes", function () { + it("should get the size in bytes to use to represent a positive integer", function () { + expect(getSizeInBytes(0)).toEqual(0); + for (let i = 1; i <= 0xff; i++) { + expect(getSizeInBytes(i)).toEqual(1); + } + + for (let i = 0x100; i <= 0xffff; i += 0x100) { + expect(getSizeInBytes(i)).toEqual(2); + } + + for (let i = 0x10000; i <= 0xffffff; i += 0x10000) { + expect(getSizeInBytes(i)).toEqual(3); + } + }); + }); }); diff --git a/test/unit/writer_spec.js b/test/unit/writer_spec.js index 6d310127741c6..885b51c229e0d 100644 --- a/test/unit/writer_spec.js +++ b/test/unit/writer_spec.js @@ -37,26 +37,55 @@ describe("Writer", function () { info: {}, }; - let data = await incrementalUpdate({ originalData, xrefInfo, newRefs }); + let data = await incrementalUpdate({ + originalData, + xrefInfo, + newRefs, + useXrefStream: true, + }); data = bytesToString(data); - const expected = + let expected = "\nabc\n" + "defg\n" + "789 0 obj\n" + - "<< /Size 790 /Prev 314 /Type /XRef /Index [0 1 123 1 456 1 789 1] " + - "/ID [(id) (\x01#Eg\x89\xab\xcd\xef\xfe\xdc\xba\x98vT2\x10)] " + - "/W [1 1 2] /Length 16>> stream\n" + - "\x00\x01\xff\xff" + - "\x01\x01\x00\x2d" + - "\x01\x05\x00\x4e" + - "\x01\x0a\x00\x00\n" + + "<< /Prev 314 /Size 790 /Type /XRef /Index [123 1 456 1 789 1] " + + "/W [1 1 1] /ID [(id) (\x01#Eg\x89\xab\xcd\xef\xfe\xdc\xba\x98vT2\x10)] " + + "/Length 9>> stream\n" + + "\x01\x01\x2d" + + "\x01\x05\x4e" + + "\x01\x0a\x00\n" + "endstream\n" + "endobj\n" + "startxref\n" + "10\n" + "%%EOF\n"; + expect(data).toEqual(expected); + data = await incrementalUpdate({ + originalData, + xrefInfo, + newRefs, + useXrefStream: false, + }); + data = bytesToString(data); + + expected = + "\nabc\n" + + "defg\n" + + "xref\n" + + "123 1\n" + + "0000000001 00045 n\r\n" + + "456 1\n" + + "0000000005 00078 n\r\n" + + "789 1\n" + + "0000000010 00000 n\r\n" + + "trailer\n" + + "<< /Prev 314 /Size 789 " + + "/ID [(id) (\x01#Eg\x89\xab\xcd\xef\xfe\xdc\xba\x98vT2\x10)]>>\n" + + "startxref\n" + + "10\n" + + "%%EOF\n"; expect(data).toEqual(expected); }); @@ -74,17 +103,21 @@ describe("Writer", function () { info: {}, }; - let data = await incrementalUpdate({ originalData, xrefInfo, newRefs }); + let data = await incrementalUpdate({ + originalData, + xrefInfo, + newRefs, + useXrefStream: true, + }); data = bytesToString(data); const expected = "\nabc\n" + "789 0 obj\n" + - "<< /Size 790 /Prev 314 /Type /XRef /Index [0 1 123 1 789 1] " + - "/W [1 1 2] /Length 12>> stream\n" + - "\x00\x01\xff\xff" + - "\x01\x01\x00\x2d" + - "\x01\x05\x00\x00\n" + + "<< /Prev 314 /Size 790 /Type /XRef /Index [123 1 789 1] " + + "/W [1 1 1] /Length 6>> stream\n" + + "\x01\x01\x2d" + + "\x01\x05\x00\n" + "endstream\n" + "endobj\n" + "startxref\n" + @@ -187,6 +220,7 @@ describe("Writer", function () { acroForm, xfaData, xref: {}, + useXrefStream: true, }); data = bytesToString(data); @@ -202,8 +236,8 @@ describe("Writer", function () { "endstream\n" + "endobj\n" + "131415 0 obj\n" + - "<< /Size 131416 /Prev 314 /Type /XRef /Index [0 1 789 1 101112 1 131415 1] /W [1 1 2] /Length 16>> stream\n" + - "\u0000\u0001ÿÿ\u0001\u0001\u0000\u0000\u0001[\u0000\u0000\u0001¹\u0000\u0000\n" + + "<< /Prev 314 /Size 131416 /Type /XRef /Index [789 1 101112 1 131415 1] /W [1 1 0] /Length 6>> stream\n" + + "\x01\x01\x01[\x01¹\n" + "endstream\n" + "endobj\n" + "startxref\n" +