Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

XFA - Overwrite AcroForm dictionary when saving if no datasets in XFA (bug 1720179) #13967

Merged
merged 1 commit into from
Sep 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/core/catalog.js
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,11 @@ class Catalog {
return shadow(this, "acroForm", acroForm);
}

get acroFormRef() {
const value = this._catDict.getRaw("AcroForm");
return shadow(this, "acroFormRef", isRef(value) ? value : null);
}

get metadata() {
const streamRef = this._catDict.getRaw("Metadata");
if (!isRef(streamRef)) {
Expand Down
14 changes: 13 additions & 1 deletion src/core/worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,7 @@ class WorkerMessageHandler {
const promises = [
pdfManager.onLoadedStream(),
pdfManager.ensureCatalog("acroForm"),
pdfManager.ensureCatalog("acroFormRef"),
pdfManager.ensureDoc("xref"),
pdfManager.ensureDoc("startXRef"),
];
Expand All @@ -597,6 +598,7 @@ class WorkerMessageHandler {
return Promise.all(promises).then(function ([
stream,
acroForm,
acroFormRef,
xref,
startXRef,
...refs
Expand All @@ -621,15 +623,22 @@ class WorkerMessageHandler {
}
}

const xfa = (acroForm instanceof Dict && acroForm.get("XFA")) || [];
const xfa = (acroForm instanceof Dict && acroForm.get("XFA")) || null;
let xfaDatasets = null;
let hasDatasets = false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you actually need this separate boolean?
Can't you just check !!xfaDatasets in the src/core/writer.js code, and thus avoid the need to track closely related state twice?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In case xfaDatasets is null, I set it to a new reference after having visited the XFA array so it's no more null.
So the only way for xfaDatasets to be null is to not have an array for XFA.
So when we have an array we must know if the reference we've has been newly created (and in this case we must update AcroForm) or not.

We could probably get rid of this boolean in moving https://github.com/mozilla/pdf.js/blob/master/src/core/worker.js#L650 in writer.js because the afaik the xref for the xref table must be the last one (so it's why I get the new ref for xfaDatasets before calling incrementalUpdate), but I guess it's better to do that in a follow-up.

if (Array.isArray(xfa)) {
for (let i = 0, ii = xfa.length; i < ii; i += 2) {
if (xfa[i] === "datasets") {
xfaDatasets = xfa[i + 1];
acroFormRef = null;
hasDatasets = true;
}
}
if (xfaDatasets === null) {
xfaDatasets = xref.getNewRef();
}
} else {
acroFormRef = null;
// TODO: Support XFA streams.
warn("Unsupported XFA type.");
}
Expand Down Expand Up @@ -666,6 +675,9 @@ class WorkerMessageHandler {
newRefs,
xref,
datasetsRef: xfaDatasets,
hasDatasets,
acroFormRef,
acroForm,
xfaData,
});
});
Expand Down
62 changes: 59 additions & 3 deletions src/core/writer.js
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,54 @@ function writeXFADataForAcroform(str, newRefs) {
return buffer.join("");
}

function updateXFA(xfaData, datasetsRef, newRefs, xref) {
if (datasetsRef === null || xref === null) {
function updateXFA({
xfaData,
datasetsRef,
hasDatasets,
acroFormRef,
acroForm,
newRefs,
xref,
xrefInfo,
}) {
if (xref === null) {
return;
}

if (!hasDatasets) {
if (!acroFormRef) {
warn("XFA - Cannot save it");
return;
calixteman marked this conversation as resolved.
Show resolved Hide resolved
}

// We've a XFA array which doesn't contain a datasets entry.
// So we'll update the AcroForm dictionary to have an XFA containing
// the datasets.
const oldXfa = acroForm.get("XFA");
const newXfa = oldXfa.slice();
newXfa.splice(2, 0, "datasets");
newXfa.splice(3, 0, datasetsRef);

acroForm.set("XFA", newXfa);

const encrypt = xref.encrypt;
let transform = null;
if (encrypt) {
transform = encrypt.createCipherTransform(
acroFormRef.num,
acroFormRef.gen
);
}

const buffer = [`${acroFormRef.num} ${acroFormRef.gen} obj\n`];
writeDict(acroForm, buffer, transform);
buffer.push("\n");

acroForm.set("XFA", oldXfa);

newRefs.push({ ref: acroFormRef, data: buffer.join("") });
}

if (xfaData === null) {
const datasets = xref.fetchIfRef(datasetsRef);
xfaData = writeXFADataForAcroform(datasets.getString(), newRefs);
Expand Down Expand Up @@ -178,9 +222,21 @@ function incrementalUpdate({
newRefs,
xref = null,
datasetsRef = null,
hasDatasets = false,
acroFormRef = null,
acroForm = null,
xfaData = null,
}) {
updateXFA(xfaData, datasetsRef, newRefs, xref);
updateXFA({
xfaData,
datasetsRef,
hasDatasets,
acroFormRef,
acroForm,
newRefs,
xref,
xrefInfo,
});

const newXref = new Dict(null);
const refForXrefTable = xrefInfo.newRef;
Expand Down
63 changes: 63 additions & 0 deletions test/unit/writer_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -142,4 +142,67 @@ describe("Writer", function () {
expect(buffer.join("")).toEqual(expected);
});
});

describe("XFA", function () {
it("should update AcroForm when no datasets in XFA array", function () {
const originalData = new Uint8Array();
const newRefs = [];

const acroForm = new Dict(null);
acroForm.set("XFA", [
"preamble",
Ref.get(123, 0),
"postamble",
Ref.get(456, 0),
]);
const acroFormRef = Ref.get(789, 0);
const datasetsRef = Ref.get(101112, 0);
const xfaData = "<hello>world</hello>";

const xrefInfo = {
newRef: Ref.get(131415, 0),
startXRef: 314,
fileIds: null,
rootRef: null,
infoRef: null,
encryptRef: null,
filename: "foo.pdf",
info: {},
};

let data = incrementalUpdate({
originalData,
xrefInfo,
newRefs,
datasetsRef,
hasDatasets: false,
acroFormRef,
acroForm,
xfaData,
xref: {},
});
data = bytesToString(data);

const expected =
"\n" +
"789 0 obj\n" +
"<< /XFA [(preamble) 123 0 R (datasets) 101112 0 R (postamble) 456 0 R]>>\n" +
"101112 0 obj\n" +
"<< /Type /EmbeddedFile /Length 20>>\n" +
"stream\n" +
"<hello>world</hello>\n" +
"endstream\n" +
"endobj\n" +
"131415 0 obj\n" +
"<< /Size 131416 /Prev 314 /Type /XRef /Index [0 1 789 1 101112 1 131415 1] /W [1 1 2] /Length 16>> stream\n" +
"\u0000\u0001ÿÿ\u0001\u0001\u0000\u0000\u0001T\u0000\u0000\u0001²\u0000\u0000\n" +
"endstream\n" +
"endobj\n" +
"startxref\n" +
"178\n" +
"%%EOF\n";

expect(data).toEqual(expected);
});
});
});