diff --git a/.changeset/five-countries-wait.md b/.changeset/five-countries-wait.md
new file mode 100644
index 000000000..78fcd0a47
--- /dev/null
+++ b/.changeset/five-countries-wait.md
@@ -0,0 +1,5 @@
+---
+'citation-js-utils': patch
+---
+
+Get year from citation issued literal if it is not parsed
diff --git a/.changeset/good-cameras-hammer.md b/.changeset/good-cameras-hammer.md
new file mode 100644
index 000000000..0c329cc08
--- /dev/null
+++ b/.changeset/good-cameras-hammer.md
@@ -0,0 +1,6 @@
+---
+'citation-js-utils': patch
+'myst-cli': patch
+---
+
+Pull url from citation data and add to citation node
diff --git a/.changeset/mean-walls-hear.md b/.changeset/mean-walls-hear.md
new file mode 100644
index 000000000..12472fc59
--- /dev/null
+++ b/.changeset/mean-walls-hear.md
@@ -0,0 +1,5 @@
+---
+'myst-cli': patch
+---
+
+Add better warning message for valid dois without bibtex
diff --git a/.changeset/ninety-pots-smash.md b/.changeset/ninety-pots-smash.md
new file mode 100644
index 000000000..9a944a206
--- /dev/null
+++ b/.changeset/ninety-pots-smash.md
@@ -0,0 +1,7 @@
+---
+'myst-spec-ext': patch
+'myst-common': patch
+'myst-cli': patch
+---
+
+Add enumerator to citations and cite nodes
diff --git a/.changeset/tough-cycles-scream.md b/.changeset/tough-cycles-scream.md
new file mode 100644
index 000000000..b36128770
--- /dev/null
+++ b/.changeset/tough-cycles-scream.md
@@ -0,0 +1,6 @@
+---
+'myst-common': patch
+'myst-cli': patch
+---
+
+Add cli warnings for invalid citation labels
diff --git a/.changeset/weak-games-poke.md b/.changeset/weak-games-poke.md
new file mode 100644
index 000000000..e95b2f608
--- /dev/null
+++ b/.changeset/weak-games-poke.md
@@ -0,0 +1,5 @@
+---
+'citation-js-utils': patch
+---
+
+Stop removing urls from citation html
diff --git a/packages/citation-js-utils/src/index.ts b/packages/citation-js-utils/src/index.ts
index 6af2fcb2e..40448b217 100644
--- a/packages/citation-js-utils/src/index.ts
+++ b/packages/citation-js-utils/src/index.ts
@@ -12,7 +12,7 @@ export type CitationJson = {
type?: 'article-journal' | string;
id: string;
author?: { given: string; family: string }[];
- issued?: { 'date-parts': number[][] };
+ issued?: { 'date-parts'?: number[][]; literal?: string };
publisher?: string;
title?: string;
'citation-key'?: string;
@@ -73,12 +73,20 @@ const defaultString: OutputOptions = {
style: CitationJSStyles.apa,
};
+export function yearFromCitation(data: CitationJson) {
+ let year: number | string | undefined = data.issued?.['date-parts']?.[0]?.[0];
+ if (year) return year;
+ year = data.issued?.['literal']?.match(/\b[12][0-9]{3}\b/)?.[0];
+ if (year) return year;
+ return 'n.d.';
+}
+
export function getInlineCitation(data: CitationJson, kind: InlineCite, opts?: InlineOptions) {
let authors = data.author;
if (!authors || authors.length === 0) {
authors = data.editor;
}
- const year = data.issued?.['date-parts']?.[0]?.[0];
+ const year = yearFromCitation(data);
const prefix = opts?.prefix ? `${opts.prefix} ` : '';
const suffix = opts?.suffix ? `, ${opts.suffix}` : '';
let yearPart = kind === InlineCite.t ? ` (${year}${suffix})` : `, ${year}${suffix}`;
@@ -120,23 +128,47 @@ export type CitationRenderer = Record<
render: (style?: CitationJSStyles) => string;
inline: (kind?: InlineCite, opts?: InlineOptions) => InlineNode[];
getDOI: () => string | undefined;
+ getURL: () => string | undefined;
cite: CitationJson;
}
>;
-function wrapWithDoiAnchorTag(doiStr: string) {
- if (!doiStr) return '';
- return `${doiStr}`;
+function doiUrl(doi?: string) {
+ return doi ? `https://doi.org/${doi}` : undefined;
+}
+
+function wrapWithAnchorTag(url: string, text?: string) {
+ if (!url) return '';
+ return `${text ?? url}`;
+}
+
+function wrapWithDoiAnchorTag(doi?: string) {
+ const url = doiUrl(doi);
+ if (!url) return '';
+ return wrapWithAnchorTag(url, doi);
}
const URL_REGEX =
- /https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/;
+ /https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/g;
+
+function replaceUrlsWithAnchorElement(str?: string, doi?: string) {
+ if (!str) return '';
+ const matches = [...str.matchAll(URL_REGEX)];
+ let newStr = str;
+ matches.forEach((match) => {
+ if (doi && match[0].includes(doi)) {
+ newStr = newStr.replace(match[0], wrapWithDoiAnchorTag(doi));
+ } else {
+ newStr = newStr.replace(match[0], wrapWithAnchorTag(match[0]));
+ }
+ });
+ return newStr;
+}
-function replaceDoiWithAnchorElement(str: string, doi: string) {
- if (!str) return str;
- const match = str.match(URL_REGEX);
- if (!match) return str;
- return str.replace(URL_REGEX, wrapWithDoiAnchorTag(doi));
+export function firstNonDoiUrl(str?: string, doi?: string) {
+ if (!str) return;
+ const matches = [...str.matchAll(URL_REGEX)];
+ return matches.map((match) => match[0]).find((match) => !doi || !match.includes(doi));
}
export async function getCitations(bibtex: string): Promise {
@@ -156,7 +188,7 @@ export async function getCitations(bibtex: string): Promise {
return getInlineCitation(c, kind, opts);
},
render(style?: CitationJSStyles) {
- return replaceDoiWithAnchorElement(
+ return replaceUrlsWithAnchorElement(
cleanRef(cite.set(c).get({ ...defaultString, style: style ?? CitationJSStyles.apa })),
c.DOI,
);
@@ -164,6 +196,9 @@ export async function getCitations(bibtex: string): Promise {
getDOI(): string | undefined {
return c.DOI || undefined;
},
+ getURL(): string | undefined {
+ return firstNonDoiUrl(cleanRef(cite.set(c).get(defaultString)), c.DOI) ?? doiUrl(c.DOI);
+ },
cite: c,
},
];
diff --git a/packages/citation-js-utils/tests/basic.spec.ts b/packages/citation-js-utils/tests/basic.spec.ts
index 6c814ee56..d5bc1e659 100644
--- a/packages/citation-js-utils/tests/basic.spec.ts
+++ b/packages/citation-js-utils/tests/basic.spec.ts
@@ -1,5 +1,5 @@
import { describe, expect, it } from 'vitest';
-import { getCitations, CitationJSStyles } from '../src';
+import { getCitations, CitationJSStyles, yearFromCitation, firstNonDoiUrl } from '../src';
import {
bibtex,
doiInNote,
@@ -33,3 +33,62 @@ describe('Test reference rendering', () => {
expect(citations['cury2020sparse'].getDOI()).toBe(TEST_DOI_IN_OTHER_FIELD);
});
});
+
+describe('yearFromCitation', () => {
+ it('date-parts year is returned', async () => {
+ const data = { id: 'id', issued: { 'date-parts': [[2020, 1, 1]] } };
+ expect(yearFromCitation(data)).toEqual(2020);
+ });
+ it('date-parts year is prioritized', async () => {
+ const data = { id: 'id', issued: { 'date-parts': [[2020, 1, 1]], literal: '1999' } };
+ expect(yearFromCitation(data)).toEqual(2020);
+ });
+ it('literal is used', async () => {
+ const data = { id: 'id', issued: { literal: '2020' } };
+ expect(yearFromCitation(data)).toEqual('2020');
+ });
+ it('literal is parses from string', async () => {
+ const data = { id: 'id', issued: { literal: 'Accessed 2020 Jan 1' } };
+ expect(yearFromCitation(data)).toEqual('2020');
+ });
+ it('literal is parses from string with comma', async () => {
+ const data = { id: 'id', issued: { literal: 'Accessed 2020, Jan 1' } };
+ expect(yearFromCitation(data)).toEqual('2020');
+ });
+ it('literal is does not parse longer number', async () => {
+ const data = { id: 'id', issued: { literal: 'Accessed 202020' } };
+ expect(yearFromCitation(data)).toEqual('n.d.');
+ });
+ it('literal is does not parse as part of word', async () => {
+ const data = { id: 'id', issued: { literal: 'Accessed a2020' } };
+ expect(yearFromCitation(data)).toEqual('n.d.');
+ });
+ it('no date returns n.d.', async () => {
+ const data = { id: 'id' };
+ expect(yearFromCitation(data)).toEqual('n.d.');
+ });
+});
+
+describe('firstNonDoiUrl', () => {
+ it('no url returns undefined', async () => {
+ expect(firstNonDoiUrl('my citation', 'abc123')).toEqual(undefined);
+ });
+ it('one url returns url', async () => {
+ expect(firstNonDoiUrl('my citation https://example.com', 'abc123')).toEqual(
+ 'https://example.com',
+ );
+ });
+ it('two urls returns first url', async () => {
+ expect(
+ firstNonDoiUrl('my citation https://example.com/a and https://example.com/b', 'abc123'),
+ ).toEqual('https://example.com/a');
+ });
+ it('doi urls is skipped', async () => {
+ expect(firstNonDoiUrl('my citation https://example.com/abc123', 'abc123')).toEqual(undefined);
+ });
+ it('url after doi url is returned', async () => {
+ expect(
+ firstNonDoiUrl('my citation https://example.com/abc123 and https://example.com/b', 'abc123'),
+ ).toEqual('https://example.com/b');
+ });
+});
diff --git a/packages/myst-cli/src/process/mdast.ts b/packages/myst-cli/src/process/mdast.ts
index bd8dc7358..6162c6f16 100644
--- a/packages/myst-cli/src/process/mdast.ts
+++ b/packages/myst-cli/src/process/mdast.ts
@@ -232,7 +232,7 @@ export async function transformMdast(
transformRenderInlineExpressions(mdast, vfile);
await transformOutputsToCache(session, mdast, kind, { minifyMaxCharacters });
transformFilterOutputStreams(mdast, vfile, frontmatter.settings);
- transformCitations(mdast, fileCitationRenderer, references);
+ transformCitations(session, file, mdast, fileCitationRenderer, references);
await unified()
.use(codePlugin, { lang: frontmatter?.kernelspec?.language })
.use(footnotesPlugin) // Needs to happen near the end
diff --git a/packages/myst-cli/src/transforms/citations.spec.ts b/packages/myst-cli/src/transforms/citations.spec.ts
new file mode 100644
index 000000000..7f11b7b17
--- /dev/null
+++ b/packages/myst-cli/src/transforms/citations.spec.ts
@@ -0,0 +1,92 @@
+import { describe, expect, it } from 'vitest';
+import { Session } from '../session';
+import { transformCitations } from './citations';
+import type { CitationRenderer } from 'citation-js-utils';
+import type { References } from 'myst-common';
+
+const RENDERER: CitationRenderer = {
+ author1: {
+ render: () => '',
+ inline: () => {
+ return [{ type: 'text', value: 'inline 1' }];
+ },
+ getDOI: () => 'abc123',
+ getURL: () => 'https://example.com',
+ cite: { id: 'my-cite-1' },
+ },
+ author2: {
+ render: () => '',
+ inline: () => {
+ return [{ type: 'text', value: 'inline 2' }];
+ },
+ getDOI: () => undefined,
+ getURL: () => undefined,
+ cite: { id: 'my-cite-2' },
+ },
+};
+
+describe('transformCitations', () => {
+ it('citation transforms', async () => {
+ const mdast: any = {
+ type: 'root',
+ children: [
+ {
+ type: 'cite',
+ label: 'author1',
+ },
+ ],
+ };
+ const references: References = {};
+ transformCitations(new Session(), '', mdast, RENDERER, references);
+ expect(mdast.children[0].children).toEqual([{ type: 'text', value: 'inline 1' }]);
+ expect(mdast.children[0].enumerator).toEqual('1');
+ expect(references.cite?.order).toEqual(['author1']);
+ expect(references.cite?.data?.author1).toEqual({
+ label: 'author1',
+ doi: 'abc123',
+ url: 'https://example.com',
+ enumerator: '1',
+ html: '',
+ });
+ });
+ it('multiple citations transform', async () => {
+ const mdast: any = {
+ type: 'root',
+ children: [
+ {
+ type: 'cite',
+ label: 'author2',
+ },
+ {
+ type: 'cite',
+ label: 'author1',
+ },
+ {
+ type: 'cite',
+ label: 'author2',
+ },
+ ],
+ };
+ const references: References = {};
+ transformCitations(new Session(), '', mdast, RENDERER, references);
+ expect(mdast.children[0].children).toEqual([{ type: 'text', value: 'inline 2' }]);
+ expect(mdast.children[0].enumerator).toEqual('1');
+ expect(mdast.children[1].children).toEqual([{ type: 'text', value: 'inline 1' }]);
+ expect(mdast.children[1].enumerator).toEqual('2');
+ expect(mdast.children[2].children).toEqual([{ type: 'text', value: 'inline 2' }]);
+ expect(mdast.children[2].enumerator).toEqual('1');
+ expect(references.cite?.order).toEqual(['author2', 'author1']);
+ expect(references.cite?.data?.author1).toEqual({
+ label: 'author1',
+ doi: 'abc123',
+ url: 'https://example.com',
+ enumerator: '2',
+ html: '',
+ });
+ expect(references.cite?.data?.author2).toEqual({
+ label: 'author2',
+ enumerator: '1',
+ html: '',
+ });
+ });
+});
diff --git a/packages/myst-cli/src/transforms/citations.ts b/packages/myst-cli/src/transforms/citations.ts
index 80161379f..f709e0f5a 100644
--- a/packages/myst-cli/src/transforms/citations.ts
+++ b/packages/myst-cli/src/transforms/citations.ts
@@ -1,32 +1,45 @@
import type { CitationRenderer } from 'citation-js-utils';
import { InlineCite } from 'citation-js-utils';
+import { RuleId } from 'myst-common';
import type { GenericNode, GenericParent, References } from 'myst-common';
import type { StaticPhrasingContent } from 'myst-spec';
import type { Cite } from 'myst-spec-ext';
import { selectAll } from 'unist-util-select';
+import type { ISession } from '../session/types.js';
+import { addWarningForFile } from '../utils/addWarningForFile.js';
function pushCite(
references: Pick,
citeRenderer: CitationRenderer,
label: string,
-) {
+): string {
if (!references.cite) {
references.cite = { order: [], data: {} };
}
- if (!references.cite?.data[label]) {
+ if (!references.cite.data[label]) {
references.cite.order.push(label);
+ references.cite.data[label] = {
+ label,
+ enumerator: `${references.cite.order.length}`,
+ doi: citeRenderer[label]?.getDOI(),
+ html: citeRenderer[label]?.render(),
+ url: citeRenderer[label]?.getURL(),
+ };
}
- references.cite.data[label] = {
- // TODO: this number isn't right? Should be the last time it was seen, not the current size.
- number: references.cite.order.length,
- doi: citeRenderer[label]?.getDOI(),
- html: citeRenderer[label]?.render(),
- };
+ return references.cite.data[label].enumerator;
}
-function addCitationChildren(cite: Cite, renderer: CitationRenderer): boolean {
+function addCitationChildren(
+ session: ISession,
+ file: string,
+ cite: Cite,
+ renderer: CitationRenderer,
+): boolean {
const render = renderer[cite.label as string];
if (!render) {
+ addWarningForFile(session, file, `Citation not found for label: ${cite.label}`, 'error', {
+ ruleId: RuleId.citationLabelExists,
+ });
cite.error = 'not found';
return false;
}
@@ -38,6 +51,15 @@ function addCitationChildren(cite: Cite, renderer: CitationRenderer): boolean {
partial: cite.partial,
}) as StaticPhrasingContent[];
} catch (error) {
+ addWarningForFile(
+ session,
+ file,
+ `Citation failed to render for label: ${cite.label}`,
+ 'error',
+ {
+ ruleId: RuleId.citationRenders,
+ },
+ );
cite.error = 'rendering error';
return false;
}
@@ -51,6 +73,8 @@ function hasChildren(node: GenericNode) {
}
export function transformCitations(
+ session: ISession,
+ file: string,
mdast: GenericParent,
renderer: CitationRenderer,
references: Pick,
@@ -59,7 +83,7 @@ export function transformCitations(
citations.forEach((cite) => {
const citeLabel = cite.label as string;
// push cites in order of appearance in the document
- const success = addCitationChildren(cite, renderer);
- if (success) pushCite(references, renderer, citeLabel);
+ const success = addCitationChildren(session, file, cite, renderer);
+ if (success) cite.enumerator = pushCite(references, renderer, citeLabel);
});
}
diff --git a/packages/myst-cli/src/transforms/dois.ts b/packages/myst-cli/src/transforms/dois.ts
index f333514e4..d58c1fe40 100644
--- a/packages/myst-cli/src/transforms/dois.ts
+++ b/packages/myst-cli/src/transforms/dois.ts
@@ -13,28 +13,38 @@ import type { SingleCitationRenderer } from './types.js';
import type { VFile } from 'vfile';
import type { ISession } from '../session/types.js';
-function doiCacheFile(session: ISession, normalizedDoi: string) {
+function doiBibtexCacheFile(session: ISession, normalizedDoi: string) {
const filename = `doi-${computeHash(normalizedDoi)}.bib`;
const cacheFolder = join(session.buildPath(), 'cache');
if (!fs.existsSync(cacheFolder)) fs.mkdirSync(cacheFolder, { recursive: true });
return join(cacheFolder, filename);
}
+function doiResolvesCacheFile(session: ISession, normalizedDoi: string) {
+ const filename = `doi-${computeHash(normalizedDoi)}.txt`;
+ const cacheFolder = join(session.buildPath(), 'cache');
+ if (!fs.existsSync(cacheFolder)) fs.mkdirSync(cacheFolder, { recursive: true });
+ return join(cacheFolder, filename);
+}
+
+/**
+ * Fetch bibtex entry for doi from doi.org using application/x-bibtex accept header
+ */
export async function getDoiOrgBibtex(
session: ISession,
doiString: string,
): Promise {
const normalizedDoi = doi.normalize(doiString);
- if (!doi.validate(doiString) || !normalizedDoi) return null;
- const cachePath = doiCacheFile(session, normalizedDoi);
+ const url = doi.buildUrl(normalizedDoi);
+ if (!doi.validate(doiString) || !normalizedDoi || !url) return null;
+ const cachePath = doiBibtexCacheFile(session, normalizedDoi);
if (fs.existsSync(cachePath)) {
const bibtex = fs.readFileSync(cachePath).toString();
- session.log.debug(`Loaded cached reference information doi:${normalizedDoi}`);
+ session.log.debug(`Loaded cached reference bibtex for doi:${normalizedDoi}`);
return bibtex;
}
const toc = tic();
- session.log.debug('Fetching DOI information from doi.org');
- const url = `https://doi.org/${normalizedDoi}`;
+ session.log.debug('Fetching DOI bibtex from doi.org');
const response = await session
.fetch(url, {
headers: [['Accept', 'application/x-bibtex']],
@@ -44,16 +54,44 @@ export async function getDoiOrgBibtex(
return null;
});
if (!response || !response.ok) {
- session.log.debug(`doi.org fetch failed for ${doiString}}`);
+ session.log.debug(`doi.org fetch failed for ${doiString}`);
return null;
}
const bibtex = await response.text();
- session.log.debug(toc(`Fetched reference information doi:${normalizedDoi} in %s`));
- session.log.debug(`Saving doi to cache ${cachePath}`);
+ session.log.debug(toc(`Fetched reference bibtex for doi:${normalizedDoi} in %s`));
+ session.log.debug(`Saving doi bibtex to cache ${cachePath}`);
fs.writeFileSync(cachePath, bibtex);
return bibtex;
}
+/**
+ * Fetch doi from doi.org to see if it resolves
+ */
+export async function doiOrgResolves(session: ISession, doiString: string): Promise {
+ const normalizedDoi = doi.normalize(doiString);
+ const url = doi.buildUrl(normalizedDoi);
+ if (!doi.validate(doiString) || !normalizedDoi || !url) return false;
+ const cachePath = doiResolvesCacheFile(session, normalizedDoi);
+ if (fs.existsSync(cachePath)) {
+ session.log.debug(`Loaded cached resolution result for doi:${normalizedDoi}`);
+ return true;
+ }
+ const toc = tic();
+ session.log.debug('Resolving doi existence from doi.org');
+ const response = await session.fetch(url).catch(() => {
+ session.log.debug(`Request to ${url} failed.`);
+ return null;
+ });
+ if (!response || !response.ok) {
+ session.log.debug(`doi.org fetch failed for ${doiString}`);
+ return false;
+ }
+ session.log.debug(toc(`Resolved doi existence for doi:${normalizedDoi} in %s`));
+ session.log.debug(`Saving resolution result to cache ${cachePath}`);
+ fs.writeFileSync(cachePath, 'ok');
+ return true;
+}
+
export async function getCitation(
session: ISession,
vfile: VFile,
@@ -63,9 +101,20 @@ export async function getCitation(
if (!doi.validate(doiString)) return null;
const bibtex = await getDoiOrgBibtex(session, doiString);
if (!bibtex) {
- fileWarn(vfile, `Could not find DOI from link: ${doiString} as ${doi.normalize(doiString)}`, {
+ const resolves = await doiOrgResolves(session, doiString);
+ const normalizedDoi = doi.normalize(doiString);
+ let message: string;
+ let note: string | undefined;
+ if (resolves) {
+ message = `No bibtex available from doi.org for doi:${normalizedDoi}`;
+ note = `To resolve this error, visit ${doi.buildUrl(normalizedDoi)} and add citation info to .bib file`;
+ } else {
+ message = `Could not find DOI from link: ${doiString} as ${normalizedDoi}`;
+ }
+ fileWarn(vfile, message, {
node,
ruleId: RuleId.doiLinkValid,
+ note,
});
return null;
}
diff --git a/packages/myst-common/src/ruleids.ts b/packages/myst-common/src/ruleids.ts
index 4ad643831..591f0c012 100644
--- a/packages/myst-common/src/ruleids.ts
+++ b/packages/myst-common/src/ruleids.ts
@@ -80,6 +80,8 @@ export enum RuleId {
// Citation rules
citationIsUnique = 'citation-is-unique',
bibFileExists = 'bib-file-exists',
+ citationRenders = 'citation-renders',
+ citationLabelExists = 'citation-label-exists',
// Code rules
codeMetadataLifted = 'code-metadata-lifted',
codeMetatagsValid = 'code-metatags-valid',
diff --git a/packages/myst-common/src/types.ts b/packages/myst-common/src/types.ts
index 20cba656a..8ad29e144 100644
--- a/packages/myst-common/src/types.ts
+++ b/packages/myst-common/src/types.ts
@@ -20,7 +20,10 @@ export type GenericParent = Record> =
export type Citations = {
order: string[];
- data: Record;
+ data: Record<
+ string,
+ { label: string; html: string; enumerator: string; doi?: string; url?: string }
+ >;
};
export enum NotebookCell {
diff --git a/packages/myst-spec-ext/src/types.ts b/packages/myst-spec-ext/src/types.ts
index d14c97024..a253a0a24 100644
--- a/packages/myst-spec-ext/src/types.ts
+++ b/packages/myst-spec-ext/src/types.ts
@@ -141,6 +141,7 @@ export type Cite = {
prefix?: string;
suffix?: string;
partial?: 'author' | 'year';
+ enumerator?: string;
};
export type CiteGroup = {