Skip to content

Commit

Permalink
🎓 Citation improvements (#937)
Browse files Browse the repository at this point in the history
* 🎓 Add better warning message for valid dois without bibtex
* 🎓 Get year from citation issued literal if it is not parsed
* 🎓 Stop removing urls from citation html
* 🚨 Add cli warnings for invalid citation labels
* 🎓 Add enumerator to citations and cite nodes
* 🎓 Pull url from citation data and add to citation node
* 🧪 Add transformCitations tests
  • Loading branch information
fwkoch authored Feb 28, 2024
1 parent da22969 commit cff47b1
Show file tree
Hide file tree
Showing 15 changed files with 335 additions and 36 deletions.
5 changes: 5 additions & 0 deletions .changeset/five-countries-wait.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'citation-js-utils': patch
---

Get year from citation issued literal if it is not parsed
6 changes: 6 additions & 0 deletions .changeset/good-cameras-hammer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
'citation-js-utils': patch
'myst-cli': patch
---

Pull url from citation data and add to citation node
5 changes: 5 additions & 0 deletions .changeset/mean-walls-hear.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'myst-cli': patch
---

Add better warning message for valid dois without bibtex
7 changes: 7 additions & 0 deletions .changeset/ninety-pots-smash.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
'myst-spec-ext': patch
'myst-common': patch
'myst-cli': patch
---

Add enumerator to citations and cite nodes
6 changes: 6 additions & 0 deletions .changeset/tough-cycles-scream.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
'myst-common': patch
'myst-cli': patch
---

Add cli warnings for invalid citation labels
5 changes: 5 additions & 0 deletions .changeset/weak-games-poke.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'citation-js-utils': patch
---

Stop removing urls from citation html
59 changes: 47 additions & 12 deletions packages/citation-js-utils/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export type CitationJson = {
type?: 'article-journal' | string;
id: string;
author?: { given: string; family: string }[];
issued?: { 'date-parts': number[][] };
issued?: { 'date-parts'?: number[][]; literal?: string };
publisher?: string;
title?: string;
'citation-key'?: string;
Expand Down Expand Up @@ -73,12 +73,20 @@ const defaultString: OutputOptions = {
style: CitationJSStyles.apa,
};

export function yearFromCitation(data: CitationJson) {
let year: number | string | undefined = data.issued?.['date-parts']?.[0]?.[0];
if (year) return year;
year = data.issued?.['literal']?.match(/\b[12][0-9]{3}\b/)?.[0];
if (year) return year;
return 'n.d.';
}

export function getInlineCitation(data: CitationJson, kind: InlineCite, opts?: InlineOptions) {
let authors = data.author;
if (!authors || authors.length === 0) {
authors = data.editor;
}
const year = data.issued?.['date-parts']?.[0]?.[0];
const year = yearFromCitation(data);
const prefix = opts?.prefix ? `${opts.prefix} ` : '';
const suffix = opts?.suffix ? `, ${opts.suffix}` : '';
let yearPart = kind === InlineCite.t ? ` (${year}${suffix})` : `, ${year}${suffix}`;
Expand Down Expand Up @@ -120,23 +128,47 @@ export type CitationRenderer = Record<
render: (style?: CitationJSStyles) => string;
inline: (kind?: InlineCite, opts?: InlineOptions) => InlineNode[];
getDOI: () => string | undefined;
getURL: () => string | undefined;
cite: CitationJson;
}
>;

function wrapWithDoiAnchorTag(doiStr: string) {
if (!doiStr) return '';
return `<a target="_blank" rel="noreferrer" href="https://doi.org/${doiStr}">${doiStr}</a>`;
function doiUrl(doi?: string) {
return doi ? `https://doi.org/${doi}` : undefined;
}

function wrapWithAnchorTag(url: string, text?: string) {
if (!url) return '';
return `<a target="_blank" rel="noreferrer" href="${url}">${text ?? url}</a>`;
}

function wrapWithDoiAnchorTag(doi?: string) {
const url = doiUrl(doi);
if (!url) return '';
return wrapWithAnchorTag(url, doi);
}

const URL_REGEX =
/https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/;
/https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/g;

function replaceUrlsWithAnchorElement(str?: string, doi?: string) {
if (!str) return '';
const matches = [...str.matchAll(URL_REGEX)];
let newStr = str;
matches.forEach((match) => {
if (doi && match[0].includes(doi)) {
newStr = newStr.replace(match[0], wrapWithDoiAnchorTag(doi));
} else {
newStr = newStr.replace(match[0], wrapWithAnchorTag(match[0]));
}
});
return newStr;
}

function replaceDoiWithAnchorElement(str: string, doi: string) {
if (!str) return str;
const match = str.match(URL_REGEX);
if (!match) return str;
return str.replace(URL_REGEX, wrapWithDoiAnchorTag(doi));
export function firstNonDoiUrl(str?: string, doi?: string) {
if (!str) return;
const matches = [...str.matchAll(URL_REGEX)];
return matches.map((match) => match[0]).find((match) => !doi || !match.includes(doi));
}

export async function getCitations(bibtex: string): Promise<CitationRenderer> {
Expand All @@ -156,14 +188,17 @@ export async function getCitations(bibtex: string): Promise<CitationRenderer> {
return getInlineCitation(c, kind, opts);
},
render(style?: CitationJSStyles) {
return replaceDoiWithAnchorElement(
return replaceUrlsWithAnchorElement(
cleanRef(cite.set(c).get({ ...defaultString, style: style ?? CitationJSStyles.apa })),
c.DOI,
);
},
getDOI(): string | undefined {
return c.DOI || undefined;
},
getURL(): string | undefined {
return firstNonDoiUrl(cleanRef(cite.set(c).get(defaultString)), c.DOI) ?? doiUrl(c.DOI);
},
cite: c,
},
];
Expand Down
61 changes: 60 additions & 1 deletion packages/citation-js-utils/tests/basic.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { describe, expect, it } from 'vitest';
import { getCitations, CitationJSStyles } from '../src';
import { getCitations, CitationJSStyles, yearFromCitation, firstNonDoiUrl } from '../src';
import {
bibtex,
doiInNote,
Expand Down Expand Up @@ -33,3 +33,62 @@ describe('Test reference rendering', () => {
expect(citations['cury2020sparse'].getDOI()).toBe(TEST_DOI_IN_OTHER_FIELD);
});
});

describe('yearFromCitation', () => {
it('date-parts year is returned', async () => {
const data = { id: 'id', issued: { 'date-parts': [[2020, 1, 1]] } };
expect(yearFromCitation(data)).toEqual(2020);
});
it('date-parts year is prioritized', async () => {
const data = { id: 'id', issued: { 'date-parts': [[2020, 1, 1]], literal: '1999' } };
expect(yearFromCitation(data)).toEqual(2020);
});
it('literal is used', async () => {
const data = { id: 'id', issued: { literal: '2020' } };
expect(yearFromCitation(data)).toEqual('2020');
});
it('literal is parses from string', async () => {
const data = { id: 'id', issued: { literal: 'Accessed 2020 Jan 1' } };
expect(yearFromCitation(data)).toEqual('2020');
});
it('literal is parses from string with comma', async () => {
const data = { id: 'id', issued: { literal: 'Accessed 2020, Jan 1' } };
expect(yearFromCitation(data)).toEqual('2020');
});
it('literal is does not parse longer number', async () => {
const data = { id: 'id', issued: { literal: 'Accessed 202020' } };
expect(yearFromCitation(data)).toEqual('n.d.');
});
it('literal is does not parse as part of word', async () => {
const data = { id: 'id', issued: { literal: 'Accessed a2020' } };
expect(yearFromCitation(data)).toEqual('n.d.');
});
it('no date returns n.d.', async () => {
const data = { id: 'id' };
expect(yearFromCitation(data)).toEqual('n.d.');
});
});

describe('firstNonDoiUrl', () => {
it('no url returns undefined', async () => {
expect(firstNonDoiUrl('my citation', 'abc123')).toEqual(undefined);
});
it('one url returns url', async () => {
expect(firstNonDoiUrl('my citation https://example.com', 'abc123')).toEqual(
'https://example.com',
);
});
it('two urls returns first url', async () => {
expect(
firstNonDoiUrl('my citation https://example.com/a and https://example.com/b', 'abc123'),
).toEqual('https://example.com/a');
});
it('doi urls is skipped', async () => {
expect(firstNonDoiUrl('my citation https://example.com/abc123', 'abc123')).toEqual(undefined);
});
it('url after doi url is returned', async () => {
expect(
firstNonDoiUrl('my citation https://example.com/abc123 and https://example.com/b', 'abc123'),
).toEqual('https://example.com/b');
});
});
2 changes: 1 addition & 1 deletion packages/myst-cli/src/process/mdast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ export async function transformMdast(
transformRenderInlineExpressions(mdast, vfile);
await transformOutputsToCache(session, mdast, kind, { minifyMaxCharacters });
transformFilterOutputStreams(mdast, vfile, frontmatter.settings);
transformCitations(mdast, fileCitationRenderer, references);
transformCitations(session, file, mdast, fileCitationRenderer, references);
await unified()
.use(codePlugin, { lang: frontmatter?.kernelspec?.language })
.use(footnotesPlugin) // Needs to happen near the end
Expand Down
92 changes: 92 additions & 0 deletions packages/myst-cli/src/transforms/citations.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import { describe, expect, it } from 'vitest';
import { Session } from '../session';
import { transformCitations } from './citations';
import type { CitationRenderer } from 'citation-js-utils';
import type { References } from 'myst-common';

const RENDERER: CitationRenderer = {
author1: {
render: () => '<rendered 1/>',
inline: () => {
return [{ type: 'text', value: 'inline 1' }];
},
getDOI: () => 'abc123',
getURL: () => 'https://example.com',
cite: { id: 'my-cite-1' },
},
author2: {
render: () => '<rendered 2/>',
inline: () => {
return [{ type: 'text', value: 'inline 2' }];
},
getDOI: () => undefined,
getURL: () => undefined,
cite: { id: 'my-cite-2' },
},
};

describe('transformCitations', () => {
it('citation transforms', async () => {
const mdast: any = {
type: 'root',
children: [
{
type: 'cite',
label: 'author1',
},
],
};
const references: References = {};
transformCitations(new Session(), '', mdast, RENDERER, references);
expect(mdast.children[0].children).toEqual([{ type: 'text', value: 'inline 1' }]);
expect(mdast.children[0].enumerator).toEqual('1');
expect(references.cite?.order).toEqual(['author1']);
expect(references.cite?.data?.author1).toEqual({
label: 'author1',
doi: 'abc123',
url: 'https://example.com',
enumerator: '1',
html: '<rendered 1/>',
});
});
it('multiple citations transform', async () => {
const mdast: any = {
type: 'root',
children: [
{
type: 'cite',
label: 'author2',
},
{
type: 'cite',
label: 'author1',
},
{
type: 'cite',
label: 'author2',
},
],
};
const references: References = {};
transformCitations(new Session(), '', mdast, RENDERER, references);
expect(mdast.children[0].children).toEqual([{ type: 'text', value: 'inline 2' }]);
expect(mdast.children[0].enumerator).toEqual('1');
expect(mdast.children[1].children).toEqual([{ type: 'text', value: 'inline 1' }]);
expect(mdast.children[1].enumerator).toEqual('2');
expect(mdast.children[2].children).toEqual([{ type: 'text', value: 'inline 2' }]);
expect(mdast.children[2].enumerator).toEqual('1');
expect(references.cite?.order).toEqual(['author2', 'author1']);
expect(references.cite?.data?.author1).toEqual({
label: 'author1',
doi: 'abc123',
url: 'https://example.com',
enumerator: '2',
html: '<rendered 1/>',
});
expect(references.cite?.data?.author2).toEqual({
label: 'author2',
enumerator: '1',
html: '<rendered 2/>',
});
});
});
Loading

0 comments on commit cff47b1

Please sign in to comment.