Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🔀 Duplicate notebook article in JATS export #400

Merged
merged 4 commits into from
May 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions packages/myst-cli/src/build/jats/single.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,17 @@ export async function runJatsExport(
},
)
).map((content) => {
const { file, mdast, frontmatter, slug } = content;
const { kind, file, mdast, frontmatter, slug } = content;
const rendererFiles = projectPath ? [projectPath, file] : [file];
const citations = combineCitationRenderers(castSession(session), ...rendererFiles);
return { mdast, frontmatter, citations, slug };
return { mdast, kind, frontmatter, citations, slug };
});
const [processedArticle, ...processedSubArticles] = processedContents;
const vfile = new VFile();
vfile.path = output;
const jats = writeJats(vfile, processedArticle as any, {
subArticles: processedSubArticles as any,
fullArticle: true,
writeFullArticle: true,
spaces: 2,
});
logMessagesFromVFile(session, jats);
Expand Down
43 changes: 30 additions & 13 deletions packages/myst-to-jats/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import type { VFile } from 'vfile';
import { js2xml } from 'xml-js';
import type { CitationRenderer } from 'citation-js-utils';
import type { MessageInfo, GenericNode } from 'myst-common';
import { copyNode, fileError } from 'myst-common';
import { SourceFileKind, copyNode, fileError } from 'myst-common';
import type { PageFrontmatter } from 'myst-frontmatter';
import { Tags, RefType } from 'jats-xml';
import type { MinifiedOutput } from 'nbtx';
Expand Down Expand Up @@ -465,7 +465,7 @@ class JatsSerializer implements IJatsSerializer {
this.expressions = [];
this.handlers = opts?.handlers ?? handlers;
const mdastCopy = copyNode(mdast) as any;
basicTransformations(mdastCopy);
basicTransformations(mdastCopy, opts ?? {});
this.renderChildren(mdastCopy);
while (this.stack.length > 1) this.closeNode();
}
Expand Down Expand Up @@ -586,6 +586,10 @@ export class JatsDocument {
if (specificUse) attributes['specific-use'] = specificUse;
if (this.content.slug) attributes.id = this.content.slug;
const state = new JatsSerializer(this.file, this.content.mdast, this.options);
const subArticles = this.options.subArticles ?? [];
if (this.content.kind === SourceFileKind.Notebook) {
subArticles.unshift(this.content);
}
const elements: Element[] = [
...getFront(this.content.frontmatter),
this.body(state),
Expand All @@ -594,7 +598,9 @@ export class JatsDocument {
footnotes: state.footnotes,
expressions: state.expressions,
}),
...(this.options.subArticles ?? []).map((article) => this.subArticle(article)),
...subArticles.map((article, ind) =>
this.subArticle(article, ind === 0 && this.content.kind === SourceFileKind.Notebook),
),
];
const article: Element = {
type: 'element',
Expand All @@ -605,7 +611,7 @@ export class JatsDocument {
return article;
}

frontStub(frontmatter?: PageFrontmatter): Element[] {
frontStub(frontmatter?: PageFrontmatter, notebookRep?: boolean): Element[] {
const stubFrontmatter: Record<string, any> = {};
if (frontmatter) {
Object.entries(frontmatter).forEach(([key, val]) => {
Expand All @@ -616,14 +622,25 @@ export class JatsDocument {
});
}
const articleMeta = getArticleMeta(stubFrontmatter);
if (!articleMeta) return [];
return [{ type: 'element', name: 'front-stub', elements: articleMeta.elements }];
const elements = articleMeta?.elements ?? [];
if (notebookRep) {
elements.push({
type: 'element',
name: 'article-version',
attributes: { 'article-version-type': 'alt representation' },
elements: [{ type: 'text', text: 'notebook' }],
});
}
return [{ type: 'element', name: 'front-stub', elements }];
}

subArticle(content: ArticleContent): Element {
const state = new JatsSerializer(this.file, content.mdast, this.options);
subArticle(content: ArticleContent, notebookRep: boolean): Element {
const state = new JatsSerializer(this.file, content.mdast, {
...this.options,
isSubArticle: true,
});
const elements: Element[] = [
...this.frontStub(content.frontmatter),
...this.frontStub(content.frontmatter, notebookRep),
{ type: 'element', name: 'body', elements: state.elements() },
...getBack({
citations: content.citations,
Expand All @@ -644,7 +661,7 @@ export class JatsDocument {

export function writeJats(file: VFile, content: ArticleContent, opts?: DocumentOptions) {
const doc = new JatsDocument(file, content, opts ?? { handlers });
const element = opts?.fullArticle
const element = opts?.writeFullArticle
? {
type: 'element',
elements: [
Expand All @@ -667,12 +684,12 @@ export function writeJats(file: VFile, content: ArticleContent, opts?: DocumentO
}

const plugin: Plugin<
[PageFrontmatter?, CitationRenderer?, string?, DocumentOptions?],
[SourceFileKind, PageFrontmatter?, CitationRenderer?, string?, DocumentOptions?],
Root,
VFile
> = function (frontmatter, citations, slug, opts) {
> = function (kind, frontmatter, citations, slug, opts) {
this.Compiler = (node, file) => {
return writeJats(file, { mdast: node as any, frontmatter, citations, slug }, opts);
return writeJats(file, { mdast: node as any, kind, frontmatter, citations, slug }, opts);
};

return (node: Root) => {
Expand Down
9 changes: 5 additions & 4 deletions packages/myst-to-jats/src/transforms/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,21 @@ import { containerTransform } from './containers';
import { tableTransform } from './tables';
import { sectionTransform } from './sections';
import { citeGroupTransform } from './citations';
import type { Options } from '../types';

export { definitionTransform, definitionPlugin } from './definitions';
export { containerTransform, containerPlugin } from './containers';
export { tableTransform, tablePlugin } from './tables';
export { sectionTransform, sectionPlugin } from './sections';

export function basicTransformations(tree: Root) {
export function basicTransformations(tree: Root, opts: Options) {
definitionTransform(tree);
containerTransform(tree);
tableTransform(tree);
sectionTransform(tree);
sectionTransform(tree, opts);
citeGroupTransform(tree);
}

export const basicTransformationsPlugin: Plugin<[], Root, Root> = () => (tree) => {
basicTransformations(tree);
export const basicTransformationsPlugin: Plugin<[Options], Root, Root> = (opts) => (tree) => {
basicTransformations(tree, opts);
};
45 changes: 35 additions & 10 deletions packages/myst-to-jats/src/transforms/sections.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ import type { Plugin } from 'unified';
import type { Root } from 'mdast';
import type { Parent, Heading, Block } from 'myst-spec';
import { liftChildren, NotebookCell } from 'myst-common';
import { remove } from 'unist-util-remove';
import { selectAll } from 'unist-util-select';
import type { Options } from '../types';

export type Section = Omit<Heading, 'type'> & { type: 'section'; meta?: string };

Expand All @@ -18,23 +20,46 @@ export function sectionAttrsFromBlock(node: { data?: Record<string, any>; identi
return output;
}

function blockIsNotebookSection(node: Block) {
function blockIsNotebookCode(node: Block) {
// Markdown blocks will be divided to sections later by headings.
return sectionAttrsFromBlock(node)['sec-type'] === NotebookCell.code;
}

function blockIsNotebookFigure(node: Block) {
return !!node.data?.['fig-cap'];
}

/**
* This transform does the following:
* - Block nodes from notebook sources with meta.type of "notebook-code" are
* converted to section nodes
* - Remaining block nodes are removed, lifting children up a level
* - Top-level heading nodes are then used to break the tree into
* section nodes, with heading and subsequent nodes as children
* - For sub-articles:
* - Blocks are converted directly to sections with no additional transformation.
* - This means notebook cell divisions are maintained.
* - However, markdown sub-articles do not get divided into sections by header.
* - For main articles:
* - Notebook code cell blocks (with meta.type of "notebook-code") are removed.
* - Remaining blocks are removed, lifting children up a level
* - Top-level heading nodes are then used to break the tree into section nodes,
* with heading and subsequent nodes as children
*/
export function sectionTransform(tree: Root) {
export function sectionTransform(tree: Root, opts: Options) {
if (opts.isSubArticle) {
(selectAll('block', tree) as Block[]).forEach((node) => {
(node as any).type = 'section';
});
return;
}
(selectAll('block', tree) as Block[]).forEach((node) => {
if (blockIsNotebookSection(node)) (node as any).type = 'section';
if (blockIsNotebookFigure(node)) {
(node as any).type = 'section';
} else if (blockIsNotebookCode(node)) {
(node as any).type = '__delete__';
}
});
const removed = remove(tree, '__delete__');
if (removed === null) {
// remove is unhappy if all children are removed - this forces it through
tree.children = [];
}
liftChildren(tree, 'block'); // this looses part information. TODO: milestones
const children: Parent[] = [];
let current: Section | undefined = undefined;
Expand Down Expand Up @@ -69,6 +94,6 @@ export function sectionTransform(tree: Root) {
tree.children = children as any;
}

export const sectionPlugin: Plugin<[], Root, Root> = () => (tree) => {
sectionTransform(tree);
export const sectionPlugin: Plugin<[Options], Root, Root> = (opts) => (tree) => {
sectionTransform(tree, opts);
};
6 changes: 4 additions & 2 deletions packages/myst-to-jats/src/types.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { GenericNode, MessageInfo } from 'myst-common';
import type { GenericNode, MessageInfo, SourceFileKind } from 'myst-common';
import type { PageFrontmatter } from 'myst-frontmatter';
import type { Root } from 'myst-spec';
import type { CitationRenderer } from 'citation-js-utils';
Expand All @@ -20,12 +20,13 @@ export type MathPlugins = Required<PageFrontmatter>['math'];

export type Options = {
handlers?: Record<string, Handler>;
isSubArticle?: boolean;
};

export type DocumentOptions = Options & {
subArticles?: ArticleContent[];
spaces?: number;
fullArticle?: boolean;
writeFullArticle?: boolean;
};

export type StateData = {
Expand All @@ -34,6 +35,7 @@ export type StateData = {

export type ArticleContent = {
mdast: Root;
kind: SourceFileKind;
frontmatter?: PageFrontmatter;
citations?: CitationRenderer;
slug?: string;
Expand Down
127 changes: 0 additions & 127 deletions packages/myst-to-jats/tests/article.yml
Original file line number Diff line number Diff line change
Expand Up @@ -254,130 +254,3 @@ cases:
</fn-group>
</back>
</article>
- title: Notebook outputs
tree:
type: root
children:
- type: block
data:
id: nb-cell-0
type: notebook-code
identifier: nb-cell-0
label: nb-cell-0
html_id: nb-cell-0
children:
- type: code
lang: python
executable: true
value: print('abc')
identifier: nb-cell-0-code
enumerator: 1
html_id: nb-cell-0-code
- type: output
id: T7FMDqDm8dM2bOT1tKeeM
identifier: nb-cell-0-output
html_id: nb-cell-0-output
data:
- name: stdout
output_type: stream
text: abc\n...
hash: a
path: files/a.txt
- type: block
data:
id: nb-cell-1
type: notebook-code
identifier: nb-cell-1
label: nb-cell-1
html_id: nb-cell-1
children:
- type: code
lang: python
executable: true
value: "'abc'"
identifier: nb-cell-1-code
enumerator: 2
html_id: nb-cell-1-code
- type: output
id: uE4mPgSow0oyo0dvEH9Lc
identifier: nb-cell-1-output
html_id: nb-cell-1-output
data:
- output_type: execute_result
execution_count: 3
metadata: {}
data:
text/plain:
content_type: text/plain
hash: b
path: files/b.txt
text/html:
content_type: text/html
hash: b
path: files/b.html
- type: block
data:
id: nb-cell-2
type: notebook-code
identifier: nb-cell-2
label: nb-cell-2
html_id: nb-cell-2
children:
- type: code
lang: python
executable: true
value: a
identifier: nb-cell-2-code
enumerator: 3
html_id: nb-cell-2-code
- type: output
id: 7Qrwdo-_oq5US1Du2KCLU
identifier: nb-cell-2-output
html_id: nb-cell-2-output
data:
- ename: NameError
evalue: name 'a' is not defined
output_type: error
traceback: \u001b[0;31m------------------------------------------------------...
hash: c
path: files/c.txt
jats: |-
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD with MathML3 v1.3 20210610//EN" "http://jats.nlm.nih.gov/publishing/1.3/JATS-archivearticle1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" dtd-version="1.3" xml:lang="en">
<front>
<article-meta/>
</front>
<body>
<sec id="nb-cell-0" sec-type="notebook-code">
<code id="nb-cell-0-code" language="python" executable="yes">print('abc')</code>
<sec sec-type="notebook-output" id="nb-cell-0-output">
<alternatives>
<media specific-use="stream" mimetype="text" mime-subtype="plain" xlink:href="files/a.txt"/>
</alternatives>
</sec>
</sec>
<sec id="nb-cell-1" sec-type="notebook-code">
<code id="nb-cell-1-code" language="python" executable="yes">'abc'</code>
<sec sec-type="notebook-output" id="nb-cell-1-output">
<alternatives>
<media specific-use="text" mimetype="text" mime-subtype="plain" xlink:href="files/b.txt"/>
<media specific-use="web" mimetype="text" mime-subtype="html" xlink:href="files/b.html"/>
</alternatives>
</sec>
</sec>
<sec id="nb-cell-2" sec-type="notebook-code">
<code id="nb-cell-2-code" language="python" executable="yes">a</code>
<sec sec-type="notebook-output" id="nb-cell-2-output">
<alternatives>
<media specific-use="error" mimetype="text" mime-subtype="plain" xlink:href="files/c.txt">
<caption>
<title>NameError</title>
<p>name 'a' is not defined</p>
</caption>
</media>
</alternatives>
</sec>
</sec>
</body>
</article>
Loading