Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[api-minor] Use the Fetch API, when supported, to load PDF documents in Node.js environments #17706

Merged
merged 1 commit into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion src/display/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,16 @@ function getDocument(src) {
PDFJSDev.test("GENERIC") &&
isNodeJS
) {
return new PDFNodeStream(params);
const isFetchSupported = function () {
return (
typeof fetch !== "undefined" &&
typeof Response !== "undefined" &&
"body" in Response.prototype
);
};
return isFetchSupported() && isValidFetchUrl(params.url)
? new PDFFetchStream(params)
: new PDFNodeStream(params);
}
return isValidFetchUrl(params.url)
? new PDFFetchStream(params)
Expand Down Expand Up @@ -762,6 +771,9 @@ class PDFDocumentProxy {

if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
// For testing purposes.
Object.defineProperty(this, "getNetworkStreamName", {
value: () => this._transport.getNetworkStreamName(),
});
Object.defineProperty(this, "getXFADatasets", {
value: () => this._transport.getXFADatasets(),
});
Expand Down Expand Up @@ -2344,6 +2356,9 @@ class WorkerTransport {

if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
// For testing purposes.
Object.defineProperty(this, "getNetworkStreamName", {
value: () => networkStream?.constructor?.name || null,
});
Object.defineProperty(this, "getXFADatasets", {
value: () =>
this.messageHandler.sendWithPromise("GetXFADatasets", null),
Expand Down
29 changes: 22 additions & 7 deletions test/unit/api_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import {
import {
buildGetDocumentParams,
CMAP_URL,
createTemporaryNodeServer,
DefaultFileReaderFactory,
TEST_PDFS_PATH,
} from "./test_utils.js";
Expand Down Expand Up @@ -67,13 +68,27 @@ describe("api", function () {
buildGetDocumentParams(tracemonkeyFileName);

let CanvasFactory;
let tempServer = null;

beforeAll(function () {
CanvasFactory = new DefaultCanvasFactory();

if (isNodeJS) {
tempServer = createTemporaryNodeServer();
}
});

afterAll(function () {
CanvasFactory = null;

if (isNodeJS) {
// Close the server from accepting new connections after all test
// finishes.
const { server } = tempServer;
server.close();

tempServer = null;
}
});

function waitSome(callback) {
Expand Down Expand Up @@ -119,13 +134,10 @@ describe("api", function () {
});

it("creates pdf doc from URL-object", async function () {
if (isNodeJS) {
pending("window.location is not supported in Node.js.");
}
const urlObj = new URL(
TEST_PDFS_PATH + basicApiFileName,
window.location
);
const urlObj = isNodeJS
? new URL(`http://127.0.0.1:${tempServer.port}/${basicApiFileName}`)
: new URL(TEST_PDFS_PATH + basicApiFileName, window.location);

const loadingTask = getDocument(urlObj);
expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);
const pdfDocument = await loadingTask.promise;
Expand All @@ -134,6 +146,9 @@ describe("api", function () {
expect(pdfDocument instanceof PDFDocumentProxy).toEqual(true);
expect(pdfDocument.numPages).toEqual(3);

// Ensure that the Fetch API was used to load the PDF document.
expect(pdfDocument.getNetworkStreamName()).toEqual("PDFFetchStream");

await loadingTask.destroy();
});

Expand Down
1 change: 1 addition & 0 deletions test/unit/clitests.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"encodings_spec.js",
"evaluator_spec.js",
"event_utils_spec.js",
"fetch_stream_spec.js",
"font_substitutions_spec.js",
"function_spec.js",
"message_handler_spec.js",
Expand Down
32 changes: 28 additions & 4 deletions test/unit/fetch_stream_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,40 @@
* limitations under the License.
*/

import { AbortException } from "../../src/shared/util.js";
import { AbortException, isNodeJS } from "../../src/shared/util.js";
import { createTemporaryNodeServer } from "./test_utils.js";
import { PDFFetchStream } from "../../src/display/fetch_stream.js";

describe("fetch_stream", function () {
const pdfUrl = new URL("../pdfs/tracemonkey.pdf", window.location).href;
let tempServer = null;

function getPdfUrl() {
return isNodeJS
? `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf`
: new URL("../pdfs/tracemonkey.pdf", window.location).href;
}
const pdfLength = 1016315;

beforeAll(function () {
if (isNodeJS) {
tempServer = createTemporaryNodeServer();
}
});

afterAll(function () {
if (isNodeJS) {
// Close the server from accepting new connections after all test
// finishes.
const { server } = tempServer;
server.close();

tempServer = null;
}
});

it("read with streaming", async function () {
const stream = new PDFFetchStream({
url: pdfUrl,
url: getPdfUrl(),
disableStream: false,
disableRange: true,
});
Expand Down Expand Up @@ -57,7 +81,7 @@ describe("fetch_stream", function () {
it("read ranges with streaming", async function () {
const rangeSize = 32768;
const stream = new PDFFetchStream({
url: pdfUrl,
url: getPdfUrl(),
rangeChunkSize: rangeSize,
disableStream: false,
disableRange: false,
Expand Down
49 changes: 9 additions & 40 deletions test/unit/node_stream_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
*/

import { AbortException, isNodeJS } from "../../src/shared/util.js";
import { createTemporaryNodeServer } from "./test_utils.js";
import { PDFNodeStream } from "../../src/display/node_stream.js";

// Ensure that these tests only run in Node.js environments.
Expand All @@ -25,12 +26,10 @@ if (!isNodeJS) {

const path = await __non_webpack_import__("path");
const url = await __non_webpack_import__("url");
const http = await __non_webpack_import__("http");
const fs = await __non_webpack_import__("fs");

describe("node_stream", function () {
let server = null;
let port = null;
let tempServer = null;

const pdf = url.parse(
encodeURI(
"file://" + path.join(process.cwd(), "./test/pdfs/tracemonkey.pdf")
Expand All @@ -39,50 +38,20 @@ describe("node_stream", function () {
const pdfLength = 1016315;

beforeAll(function () {
// Create http server to serve pdf data for tests.
server = http
.createServer((request, response) => {
const filePath = process.cwd() + "/test/pdfs" + request.url;
fs.lstat(filePath, (error, stat) => {
if (error) {
response.writeHead(404);
response.end(`File ${request.url} not found!`);
return;
}
if (!request.headers.range) {
const contentLength = stat.size;
const stream = fs.createReadStream(filePath);
response.writeHead(200, {
"Content-Type": "application/pdf",
"Content-Length": contentLength,
"Accept-Ranges": "bytes",
});
stream.pipe(response);
} else {
const [start, end] = request.headers.range
.split("=")[1]
.split("-")
.map(x => Number(x));
const stream = fs.createReadStream(filePath, { start, end });
response.writeHead(206, {
"Content-Type": "application/pdf",
});
stream.pipe(response);
}
});
})
.listen(0); /* Listen on a random free port */
port = server.address().port;
tempServer = createTemporaryNodeServer();
});

afterAll(function () {
// Close the server from accepting new connections after all test finishes.
const { server } = tempServer;
server.close();

tempServer = null;
});

it("read both http(s) and filesystem pdf files", async function () {
const stream1 = new PDFNodeStream({
url: `http://127.0.0.1:${port}/tracemonkey.pdf`,
url: `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf`,
rangeChunkSize: 65536,
disableStream: true,
disableRange: true,
Expand Down Expand Up @@ -144,7 +113,7 @@ describe("node_stream", function () {
it("read custom ranges for both http(s) and filesystem urls", async function () {
const rangeSize = 32768;
const stream1 = new PDFNodeStream({
url: `http://127.0.0.1:${port}/tracemonkey.pdf`,
url: `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf`,
length: pdfLength,
rangeChunkSize: rangeSize,
disableStream: true,
Expand Down
49 changes: 47 additions & 2 deletions test/unit/test_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,16 @@
* limitations under the License.
*/

import { assert, isNodeJS } from "../../src/shared/util.js";
import { NullStream, StringStream } from "../../src/core/stream.js";
import { Page, PDFDocument } from "../../src/core/document.js";
import { isNodeJS } from "../../src/shared/util.js";
import { Ref } from "../../src/core/primitives.js";

let fs;
let fs, http;
if (isNodeJS) {
// Native packages.
fs = await __non_webpack_import__("fs");
http = await __non_webpack_import__("http");
}

const TEST_PDFS_PATH = isNodeJS ? "./test/pdfs/" : "../pdfs/";
Expand Down Expand Up @@ -144,10 +145,54 @@ function createIdFactory(pageIndex) {
return page._localIdFactory;
}

function createTemporaryNodeServer() {
assert(isNodeJS, "Should only be used in Node.js environments.");

// Create http server to serve pdf data for tests.
const server = http
.createServer((request, response) => {
const filePath = process.cwd() + "/test/pdfs" + request.url;
fs.lstat(filePath, (error, stat) => {
if (error) {
response.writeHead(404);
response.end(`File ${request.url} not found!`);
return;
}
if (!request.headers.range) {
const contentLength = stat.size;
const stream = fs.createReadStream(filePath);
response.writeHead(200, {
"Content-Type": "application/pdf",
"Content-Length": contentLength,
"Accept-Ranges": "bytes",
});
stream.pipe(response);
} else {
const [start, end] = request.headers.range
.split("=")[1]
.split("-")
.map(x => Number(x));
const stream = fs.createReadStream(filePath, { start, end });
response.writeHead(206, {
"Content-Type": "application/pdf",
});
stream.pipe(response);
}
});
})
.listen(0); /* Listen on a random free port */

return {
server,
port: server.address().port,
};
}

export {
buildGetDocumentParams,
CMAP_URL,
createIdFactory,
createTemporaryNodeServer,
DefaultFileReaderFactory,
STANDARD_FONT_DATA_URL,
TEST_PDFS_PATH,
Expand Down