Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
Snuffleupagus committed Sep 30, 2021
1 parent d3ca28b commit 758be72
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 66 deletions.
82 changes: 72 additions & 10 deletions src/core/cmap.js
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,15 @@ const BUILT_IN_CMAPS = [
// large ranges, such as e.g. 0xFFFFFFFF (fixes issue11922_reduced.pdf).
const MAX_MAP_RANGE = 2 ** 24 - 1; // = 0xFFFFFF

function convertCidString(charCode, cid) {
switch (cid.length) {
case 1:
return cid.charCodeAt(0);
default:
return (cid.charCodeAt(0) << 8) | cid.charCodeAt(1);
}
}

// CMap, not to be confused with TrueType's cmap.
class CMap {
constructor(builtInCMap = false) {
Expand Down Expand Up @@ -266,15 +275,23 @@ class CMap {
this._map[src] = dst;
}

lookup(code) {
lookupInteger(code) {
const cid = this._map[code];
if (typeof cid === "string") {
return convertCidString(code, cid);
}
return cid;
}

lookupRaw(code) {
return this._map[code];
}

contains(code) {
return this._map[code] !== undefined;
}

forEach(callback) {
forEachInteger(callback) {
// Most maps have fewer than 65536 entries, and for those we use normal
// array iteration. But really sparse tables are possible -- e.g. with
// indices in the *billions*. For such tables we use for..in, which isn't
Expand All @@ -284,13 +301,50 @@ class CMap {
const length = map.length;
if (length <= 0x10000) {
for (let i = 0; i < length; i++) {
if (map[i] !== undefined) {
callback(i, map[i]);
let cid = map[i];
if (cid !== undefined) {
if (typeof cid === "string") {
cid = convertCidString(i, cid);
}
callback(i, cid);
}
}
} else {
for (const i in map) {
callback(i, map[i]);
let cid = map[i];
if (typeof cid === "string") {
cid = convertCidString(i, cid);
}
callback(i, cid);
}
}
}

forEachString(callback) {
// Most maps have fewer than 65536 entries, and for those we use normal
// array iteration. But really sparse tables are possible -- e.g. with
// indices in the *billions*. For such tables we use for..in, which isn't
// ideal because it stringifies the indices for all present elements, but
// it does avoid iterating over every undefined entry.
const map = this._map;
const length = map.length;
if (length <= 0x10000) {
for (let i = 0; i < length; i++) {
let token = map[i];
if (token !== undefined) {
if (typeof token === "number") {
token = String.fromCodePoint(token);
}
callback(i, token);
}
}
} else {
for (const i in map) {
let token = map[i];
if (typeof token === "number") {
token = String.fromCodePoint(token);
}
callback(i, token);
}
}
}
Expand Down Expand Up @@ -399,20 +453,28 @@ class IdentityCMap extends CMap {
unreachable("should not call mapCidOne");
}

lookup(code) {
lookupInteger(code) {
return Number.isInteger(code) && code <= 0xffff ? code : undefined;
}

lookupRaw(code) {
this.lookupInteger(code);
}

contains(code) {
return Number.isInteger(code) && code <= 0xffff;
}

forEach(callback) {
forEachInteger(callback) {
for (let i = 0; i <= 0xffff; i++) {
callback(i, i);
}
}

forEachString(callback) {
unreachable("should not call forEachString");
}

charCodeOf(value) {
return Number.isInteger(value) && value <= 0xffff ? value : -1;
}
Expand Down Expand Up @@ -970,10 +1032,10 @@ const CMapFactory = (function CMapFactoryClosure() {
cMap.numCodespaceRanges = cMap.useCMap.numCodespaceRanges;
}
// Merge the map into the current one, making sure not to override
// any previously defined entries.
cMap.useCMap.forEach(function (key, value) {
// any previously defined entries
cMap.useCMap.forEachInteger(function (key, value) {
if (!cMap.contains(key)) {
cMap.mapOne(key, cMap.useCMap.lookup(key));
cMap.mapOne(key, cMap.useCMap.lookupRaw(key));
}
});

Expand Down
19 changes: 9 additions & 10 deletions src/core/evaluator.js
Original file line number Diff line number Diff line change
Expand Up @@ -3405,17 +3405,15 @@ class PartialEvaluator {
useCMap: null,
});
const toUnicode = [];
properties.cMap.forEach(function (charcode, cid) {
properties.cMap.forEachInteger(function (charcode, cid) {
if (cid > 0xffff) {
throw new FormatError("Max size of CID is 65,535");
}
// e) Map the CID obtained in step (a) according to the CMap
// obtained in step (d), producing a Unicode value.
const ucs2 = ucs2CMap.lookup(cid);
const ucs2 = ucs2CMap.lookupInteger(cid);
if (ucs2) {
toUnicode[charcode] = String.fromCharCode(
(ucs2.charCodeAt(0) << 8) + ucs2.charCodeAt(1)
);
toUnicode[charcode] = String.fromCodePoint(ucs2);
}
});
return new ToUnicodeMap(toUnicode);
Expand Down Expand Up @@ -3454,16 +3452,17 @@ class PartialEvaluator {
// Convert UTF-16BE
// NOTE: cmap can be a sparse array, so use forEach instead of
// `for(;;)` to iterate over all keys.
cmap.forEach(function (charCode, token) {
cmap.forEachString(function (charCode, token) {
const tokenLen = token.length;
// Some cmaps contain *only* CID characters (fixes issue9367.pdf).
if (typeof token === "number") {
map[charCode] = String.fromCodePoint(token);
if (tokenLen === 1) {
map[charCode] = token;
return;
}
const str = [];
for (let k = 0; k < token.length; k += 2) {
for (let k = 0; k < tokenLen; k += 2) {
const w1 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
if ((w1 & 0xf800) !== 0xd800) {
if ((w1 & 0xf800) !== 0xd800 || tokenLen === 2) {
// w1 < 0xD800 || w1 > 0xDFFF
str.push(w1);
continue;
Expand Down
32 changes: 3 additions & 29 deletions src/core/fonts.js
Original file line number Diff line number Diff line change
Expand Up @@ -401,21 +401,6 @@ function buildToFontChar(encoding, glyphsUnicodeMap, differences) {
return toFontChar;
}

function convertCidString(charCode, cid, shouldThrow = false) {
switch (cid.length) {
case 1:
return cid.charCodeAt(0);
case 2:
return (cid.charCodeAt(0) << 8) | cid.charCodeAt(1);
}
const msg = `Unsupported CID string (charCode ${charCode}): "${cid}".`;
if (shouldThrow) {
throw new FormatError(msg);
}
warn(msg);
return cid;
}

/**
* Rebuilds the char code to glyph ID map by moving all char codes to the
* private use area. This is done to avoid issues with various problematic
Expand Down Expand Up @@ -2626,10 +2611,7 @@ class Font {
const cidToGidMap = properties.cidToGidMap || [];
const isCidToGidMapEmpty = cidToGidMap.length === 0;

properties.cMap.forEach(function (charCode, cid) {
if (typeof cid === "string") {
cid = convertCidString(charCode, cid, /* shouldThrow = */ true);
}
properties.cMap.forEachInteger(function (charCode, cid) {
if (cid > 0xffff) {
throw new FormatError("Max size of CID is 65,535");
}
Expand Down Expand Up @@ -3074,11 +3056,7 @@ class Font {
// finding the charcode via unicodeToCID map
let charcode = 0;
if (this.composite && this.cMap.contains(glyphUnicode)) {
charcode = this.cMap.lookup(glyphUnicode);

if (typeof charcode === "string") {
charcode = convertCidString(glyphUnicode, charcode);
}
charcode = this.cMap.lookupInteger(glyphUnicode);
}
// ... via toUnicode map
if (!charcode && this.toUnicode) {
Expand Down Expand Up @@ -3106,11 +3084,7 @@ class Font {

let widthCode = charcode;
if (this.cMap && this.cMap.contains(charcode)) {
widthCode = this.cMap.lookup(charcode);

if (typeof widthCode === "string") {
widthCode = convertCidString(charcode, widthCode);
}
widthCode = this.cMap.lookupInteger(charcode);
}
width = this.widths[widthCode];
width = isNum(width) ? width : this.defaultWidth;
Expand Down
34 changes: 17 additions & 17 deletions test/unit/cmap_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ describe("cmap", function () {
"endbfchar\n";
const stream = new StringStream(str);
const cmap = await CMapFactory.create({ encoding: stream });
expect(cmap.lookup(0x03)).toEqual(String.fromCharCode(0x00));
expect(cmap.lookup(0x04)).toEqual(String.fromCharCode(0x01));
expect(cmap.lookup(0x05)).toBeUndefined();
expect(cmap.lookupInteger(0x03)).toEqual(0x00);
expect(cmap.lookupInteger(0x04)).toEqual(0x01);
expect(cmap.lookupInteger(0x05)).toBeUndefined();
});

it("parses beginbfrange with range", async function () {
Expand All @@ -60,10 +60,10 @@ describe("cmap", function () {
"endbfrange\n";
const stream = new StringStream(str);
const cmap = await CMapFactory.create({ encoding: stream });
expect(cmap.lookup(0x05)).toBeUndefined();
expect(cmap.lookup(0x06)).toEqual(String.fromCharCode(0x00));
expect(cmap.lookup(0x0b)).toEqual(String.fromCharCode(0x05));
expect(cmap.lookup(0x0c)).toBeUndefined();
expect(cmap.lookupInteger(0x05)).toBeUndefined();
expect(cmap.lookupInteger(0x06)).toEqual(0x00);
expect(cmap.lookupInteger(0x0b)).toEqual(0x05);
expect(cmap.lookupInteger(0x0c)).toBeUndefined();
});

it("parses beginbfrange with array", async function () {
Expand All @@ -73,10 +73,10 @@ describe("cmap", function () {
"endbfrange\n";
const stream = new StringStream(str);
const cmap = await CMapFactory.create({ encoding: stream });
expect(cmap.lookup(0x0c)).toBeUndefined();
expect(cmap.lookup(0x0d)).toEqual(0x00);
expect(cmap.lookup(0x12)).toEqual(0x05);
expect(cmap.lookup(0x13)).toBeUndefined();
expect(cmap.lookupInteger(0x0c)).toBeUndefined();
expect(cmap.lookupInteger(0x0d)).toEqual(0x00);
expect(cmap.lookupInteger(0x12)).toEqual(0x05);
expect(cmap.lookupInteger(0x13)).toBeUndefined();
});

it("parses begincidchar", async function () {
Expand All @@ -86,8 +86,8 @@ describe("cmap", function () {
"endcidchar\n";
const stream = new StringStream(str);
const cmap = await CMapFactory.create({ encoding: stream });
expect(cmap.lookup(0x14)).toEqual(0x00);
expect(cmap.lookup(0x15)).toBeUndefined();
expect(cmap.lookupInteger(0x14)).toEqual(0x00);
expect(cmap.lookupInteger(0x15)).toBeUndefined();
});

it("parses begincidrange", async function () {
Expand All @@ -97,10 +97,10 @@ describe("cmap", function () {
"endcidrange\n";
const stream = new StringStream(str);
const cmap = await CMapFactory.create({ encoding: stream });
expect(cmap.lookup(0x15)).toBeUndefined();
expect(cmap.lookup(0x16)).toEqual(0x00);
expect(cmap.lookup(0x1b)).toEqual(0x05);
expect(cmap.lookup(0x1c)).toBeUndefined();
expect(cmap.lookupInteger(0x15)).toBeUndefined();
expect(cmap.lookupInteger(0x16)).toEqual(0x00);
expect(cmap.lookupInteger(0x1b)).toEqual(0x05);
expect(cmap.lookupInteger(0x1c)).toBeUndefined();
});

it("decodes codespace ranges", async function () {
Expand Down

0 comments on commit 758be72

Please sign in to comment.