Skip to content

Commit

Permalink
Merge pull request #17706 from Snuffleupagus/Node-Fetch-API
Browse files Browse the repository at this point in the history
[api-minor] Use the Fetch API, when supported, to load PDF documents in Node.js environments
  • Loading branch information
Snuffleupagus authored Mar 19, 2024
2 parents a142c8c + eded037 commit 0022310
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 54 deletions.
17 changes: 16 additions & 1 deletion src/display/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,16 @@ function getDocument(src) {
PDFJSDev.test("GENERIC") &&
isNodeJS
) {
return new PDFNodeStream(params);
const isFetchSupported = function () {
return (
typeof fetch !== "undefined" &&
typeof Response !== "undefined" &&
"body" in Response.prototype
);
};
return isFetchSupported() && isValidFetchUrl(params.url)
? new PDFFetchStream(params)
: new PDFNodeStream(params);
}
return isValidFetchUrl(params.url)
? new PDFFetchStream(params)
Expand Down Expand Up @@ -762,6 +771,9 @@ class PDFDocumentProxy {

if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
// For testing purposes.
Object.defineProperty(this, "getNetworkStreamName", {
value: () => this._transport.getNetworkStreamName(),
});
Object.defineProperty(this, "getXFADatasets", {
value: () => this._transport.getXFADatasets(),
});
Expand Down Expand Up @@ -2359,6 +2371,9 @@ class WorkerTransport {

if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
// For testing purposes.
Object.defineProperty(this, "getNetworkStreamName", {
value: () => networkStream?.constructor?.name || null,
});
Object.defineProperty(this, "getXFADatasets", {
value: () =>
this.messageHandler.sendWithPromise("GetXFADatasets", null),
Expand Down
29 changes: 22 additions & 7 deletions test/unit/api_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import {
import {
buildGetDocumentParams,
CMAP_URL,
createTemporaryNodeServer,
DefaultFileReaderFactory,
TEST_PDFS_PATH,
} from "./test_utils.js";
Expand Down Expand Up @@ -67,13 +68,27 @@ describe("api", function () {
buildGetDocumentParams(tracemonkeyFileName);

let CanvasFactory;
let tempServer = null;

beforeAll(function () {
CanvasFactory = new DefaultCanvasFactory();

if (isNodeJS) {
tempServer = createTemporaryNodeServer();
}
});

afterAll(function () {
CanvasFactory = null;

if (isNodeJS) {
// Close the server from accepting new connections after all test
// finishes.
const { server } = tempServer;
server.close();

tempServer = null;
}
});

function waitSome(callback) {
Expand Down Expand Up @@ -119,13 +134,10 @@ describe("api", function () {
});

it("creates pdf doc from URL-object", async function () {
if (isNodeJS) {
pending("window.location is not supported in Node.js.");
}
const urlObj = new URL(
TEST_PDFS_PATH + basicApiFileName,
window.location
);
const urlObj = isNodeJS
? new URL(`http://127.0.0.1:${tempServer.port}/${basicApiFileName}`)
: new URL(TEST_PDFS_PATH + basicApiFileName, window.location);

const loadingTask = getDocument(urlObj);
expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true);
const pdfDocument = await loadingTask.promise;
Expand All @@ -134,6 +146,9 @@ describe("api", function () {
expect(pdfDocument instanceof PDFDocumentProxy).toEqual(true);
expect(pdfDocument.numPages).toEqual(3);

// Ensure that the Fetch API was used to load the PDF document.
expect(pdfDocument.getNetworkStreamName()).toEqual("PDFFetchStream");

await loadingTask.destroy();
});

Expand Down
1 change: 1 addition & 0 deletions test/unit/clitests.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"encodings_spec.js",
"evaluator_spec.js",
"event_utils_spec.js",
"fetch_stream_spec.js",
"font_substitutions_spec.js",
"function_spec.js",
"message_handler_spec.js",
Expand Down
32 changes: 28 additions & 4 deletions test/unit/fetch_stream_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,40 @@
* limitations under the License.
*/

import { AbortException } from "../../src/shared/util.js";
import { AbortException, isNodeJS } from "../../src/shared/util.js";
import { createTemporaryNodeServer } from "./test_utils.js";
import { PDFFetchStream } from "../../src/display/fetch_stream.js";

describe("fetch_stream", function () {
const pdfUrl = new URL("../pdfs/tracemonkey.pdf", window.location).href;
let tempServer = null;

function getPdfUrl() {
return isNodeJS
? `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf`
: new URL("../pdfs/tracemonkey.pdf", window.location).href;
}
const pdfLength = 1016315;

beforeAll(function () {
if (isNodeJS) {
tempServer = createTemporaryNodeServer();
}
});

afterAll(function () {
if (isNodeJS) {
// Close the server from accepting new connections after all test
// finishes.
const { server } = tempServer;
server.close();

tempServer = null;
}
});

it("read with streaming", async function () {
const stream = new PDFFetchStream({
url: pdfUrl,
url: getPdfUrl(),
disableStream: false,
disableRange: true,
});
Expand Down Expand Up @@ -57,7 +81,7 @@ describe("fetch_stream", function () {
it("read ranges with streaming", async function () {
const rangeSize = 32768;
const stream = new PDFFetchStream({
url: pdfUrl,
url: getPdfUrl(),
rangeChunkSize: rangeSize,
disableStream: false,
disableRange: false,
Expand Down
49 changes: 9 additions & 40 deletions test/unit/node_stream_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
*/

import { AbortException, isNodeJS } from "../../src/shared/util.js";
import { createTemporaryNodeServer } from "./test_utils.js";
import { PDFNodeStream } from "../../src/display/node_stream.js";

// Ensure that these tests only run in Node.js environments.
Expand All @@ -25,12 +26,10 @@ if (!isNodeJS) {

const path = await __non_webpack_import__("path");
const url = await __non_webpack_import__("url");
const http = await __non_webpack_import__("http");
const fs = await __non_webpack_import__("fs");

describe("node_stream", function () {
let server = null;
let port = null;
let tempServer = null;

const pdf = url.parse(
encodeURI(
"file://" + path.join(process.cwd(), "./test/pdfs/tracemonkey.pdf")
Expand All @@ -39,50 +38,20 @@ describe("node_stream", function () {
const pdfLength = 1016315;

beforeAll(function () {
// Create http server to serve pdf data for tests.
server = http
.createServer((request, response) => {
const filePath = process.cwd() + "/test/pdfs" + request.url;
fs.lstat(filePath, (error, stat) => {
if (error) {
response.writeHead(404);
response.end(`File ${request.url} not found!`);
return;
}
if (!request.headers.range) {
const contentLength = stat.size;
const stream = fs.createReadStream(filePath);
response.writeHead(200, {
"Content-Type": "application/pdf",
"Content-Length": contentLength,
"Accept-Ranges": "bytes",
});
stream.pipe(response);
} else {
const [start, end] = request.headers.range
.split("=")[1]
.split("-")
.map(x => Number(x));
const stream = fs.createReadStream(filePath, { start, end });
response.writeHead(206, {
"Content-Type": "application/pdf",
});
stream.pipe(response);
}
});
})
.listen(0); /* Listen on a random free port */
port = server.address().port;
tempServer = createTemporaryNodeServer();
});

afterAll(function () {
// Close the server from accepting new connections after all test finishes.
const { server } = tempServer;
server.close();

tempServer = null;
});

it("read both http(s) and filesystem pdf files", async function () {
const stream1 = new PDFNodeStream({
url: `http://127.0.0.1:${port}/tracemonkey.pdf`,
url: `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf`,
rangeChunkSize: 65536,
disableStream: true,
disableRange: true,
Expand Down Expand Up @@ -144,7 +113,7 @@ describe("node_stream", function () {
it("read custom ranges for both http(s) and filesystem urls", async function () {
const rangeSize = 32768;
const stream1 = new PDFNodeStream({
url: `http://127.0.0.1:${port}/tracemonkey.pdf`,
url: `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf`,
length: pdfLength,
rangeChunkSize: rangeSize,
disableStream: true,
Expand Down
49 changes: 47 additions & 2 deletions test/unit/test_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,16 @@
* limitations under the License.
*/

import { assert, isNodeJS } from "../../src/shared/util.js";
import { NullStream, StringStream } from "../../src/core/stream.js";
import { Page, PDFDocument } from "../../src/core/document.js";
import { isNodeJS } from "../../src/shared/util.js";
import { Ref } from "../../src/core/primitives.js";

let fs;
let fs, http;
if (isNodeJS) {
// Native packages.
fs = await __non_webpack_import__("fs");
http = await __non_webpack_import__("http");
}

const TEST_PDFS_PATH = isNodeJS ? "./test/pdfs/" : "../pdfs/";
Expand Down Expand Up @@ -144,10 +145,54 @@ function createIdFactory(pageIndex) {
return page._localIdFactory;
}

function createTemporaryNodeServer() {
assert(isNodeJS, "Should only be used in Node.js environments.");

// Create http server to serve pdf data for tests.
const server = http
.createServer((request, response) => {
const filePath = process.cwd() + "/test/pdfs" + request.url;
fs.lstat(filePath, (error, stat) => {
if (error) {
response.writeHead(404);
response.end(`File ${request.url} not found!`);
return;
}
if (!request.headers.range) {
const contentLength = stat.size;
const stream = fs.createReadStream(filePath);
response.writeHead(200, {
"Content-Type": "application/pdf",
"Content-Length": contentLength,
"Accept-Ranges": "bytes",
});
stream.pipe(response);
} else {
const [start, end] = request.headers.range
.split("=")[1]
.split("-")
.map(x => Number(x));
const stream = fs.createReadStream(filePath, { start, end });
response.writeHead(206, {
"Content-Type": "application/pdf",
});
stream.pipe(response);
}
});
})
.listen(0); /* Listen on a random free port */

return {
server,
port: server.address().port,
};
}

export {
buildGetDocumentParams,
CMAP_URL,
createIdFactory,
createTemporaryNodeServer,
DefaultFileReaderFactory,
STANDARD_FONT_DATA_URL,
TEST_PDFS_PATH,
Expand Down

0 comments on commit 0022310

Please sign in to comment.