Skip to content

Commit

Permalink
refactor and add documentation.
Browse files Browse the repository at this point in the history
  • Loading branch information
jbweston committed Oct 24, 2018
1 parent 99a6909 commit 9e9cf80
Showing 1 changed file with 190 additions and 112 deletions.
302 changes: 190 additions & 112 deletions jupyter_sphinx/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,46 +29,28 @@
logger = logging.getLogger(__name__)


def blank_nb(kernel_name):
try:
spec = get_kernel_spec(kernel_name)
except NoSuchKernel as e:
raise ExtensionError('Unable to find kernel', orig_exc=e)
return nbformat.v4.new_notebook(metadata={
'kernelspec': {
'display_name': spec.display_name,
'language': spec.language,
'name': kernel_name,
}
})


def split_on(pred, it):
"""Split an iterator wherever a predicate is True."""

counter = 0

def count(x):
nonlocal counter
if pred(x):
counter += 1
return counter

# Return iterable of lists to ensure that we don't lose our
# place in the iterator
return (list(x) for _, x in groupby(it, count))


class Cell(docutils.nodes.container):
"""Container for input/output from Jupyter kernel"""
pass

class KernelNode(docutils.nodes.Element):
"""Dummy node for signaling a new kernel"""
pass

### Directives and their associated doctree nodes

class JupyterKernel(Directive):
"""Specify a new Jupyter Kernel.
Arguments
---------
kernel_name : str (optional)
The name of the kernel in which to execute future Jupyter cells, as
reported by executing 'jupyter kernelspec list' on the command line.
Options
-------
id : str
An identifier for *this kernel instance*. Used to name any output
files generated when executing the Jupyter cells (e.g. images
produced by cells, or a script containing the cell inputs).
Content
-------
None
"""

optional_arguments = 1
final_argument_whitespace = False
Expand All @@ -79,15 +61,53 @@ class JupyterKernel(Directive):
}

def run(self):
kernel_name = self.arguments[0] if self.arguments else ''
return [KernelNode(
return [JupyterKernelNode(
kernel_name=self.arguments[0] if self.arguments else '',
kernel_id=self.options.get('id', ''),
)]


class JupyterKernelNode(docutils.nodes.Element):
"""Inserted into doctree whenever a JupyterKernel directive is encountered.
Used as a marker to signal that the following JupyterCellNodes (until the
next, if any, JupyterKernelNode) should be executed in a separate kernel.
"""

def __init__(self, kernel_name, kernel_id):
super().__init__(
'',
kernel_name=kernel_name.strip(),
kernel_id=self.options.get('id', '').strip(),
)]
kernel_id=kernel_id.strip(),
)


class JupyterCell(Directive):
"""Define a code cell to be later executed in a Jupyter kernel.
The content of the directive is the code to execute. Code is not
executed when the directive is parsed, but later during a doctree
transformation.
Arguments
---------
filename : str (optional)
If provided, a path to a file containing code.
Options
-------
hide-code : bool
If provided, the code will not be displayed in the output.
hide-output : bool
If provided, the cell output will not be displayed in the output.
code-below : bool
If provided, the code will be shown below the cell output.
Content
-------
code : str
A code cell.
"""

required_arguments = 0
optional_arguments = 1
Expand Down Expand Up @@ -122,17 +142,98 @@ def run(self):
self.assert_has_content()
content = self.content

# Cell only contains the input for now; we will execute the cell
# and insert the output when the whole document has been parsed.
return [Cell('',
return [JupyterCellNode(content, self.options)]


class JupyterCellNode(docutils.nodes.container):
"""Inserted into doctree whever a JupyterKernel directive is encountered.
Used as a marker to signal that the following JupyterCellNodes (until the
next, if any, JupyterKernelNode) should be executed in a separate kernel.
"""

def __init__(self, source_lines, options):
return super().__init__(
'',
docutils.nodes.literal_block(
text='\n'.join(content),
text='\n'.join(source_lines),
),
hide_code=('hide-code' in self.options),
hide_output=('hide-output' in self.options),
code_below=('code-below' in self.options),
)]
hide_code=('hide-code' in options),
hide_output=('hide-output' in options),
code_below=('code-below' in options),
)


### Doctree transformations

class ExecuteJupyterCells(SphinxTransform):
"""Execute code cells in Jupyter kernels.
Traverses the doctree to find JupyterKernel and JupyterCell nodes,
then executes the code in the JupyterCell nodes in sequence, starting
a new kernel every time a JupyterKernel node is encountered. The output
from each code cell is inserted into the doctree.
"""
default_priority = 180 # An early transform, idk

def apply(self):
doctree = self.document
doc_relpath = os.path.dirname(self.env.docname) # relative to src dir
docname = os.path.basename(self.env.docname)
default_kernel = self.config.jupyter_execute_default_kernel
default_names = default_notebook_names(docname)

# Check if we have anything to execute.
if not doctree.traverse(JupyterCellNode):
return

logger.info('executing {}'.format(docname))
output_dir = os.path.join(output_directory(self.env), doc_relpath)

# Start new notebook whenever a JupyterKernelNode is encountered
jupyter_nodes = (JupyterCellNode, JupyterKernelNode)
nodes_by_notebook = split_on(
lambda n: isinstance(n, JupyterKernelNode),
doctree.traverse(lambda n: isinstance(n, jupyter_nodes))
)

for first, *nodes in nodes_by_notebook:
if isinstance(first, JupyterKernelNode):
kernel_name = first['kernel_name'] or default_kernel
file_name = first['kernel_id'] or next(default_names)
else:
nodes = (first, *nodes)
kernel_name = default_kernel
file_name = next(default_names)

notebook = execute_cells(
kernel_name,
[nbformat.v4.new_code_cell(node.astext()) for node in nodes],
self.config.jupyter_execute_kwargs,
)

# Highlight the code cells now that we know what language they are
for node in nodes:
source = node.children[0]
lexer = notebook.metadata.language_info.pygments_lexer
source.attributes['language'] = lexer

# Write certain cell outputs (e.g. images) to separate files, and
# modify the metadata of the associated cells in 'notebook' to
# include the path to the output file.
write_notebook_output(notebook, output_dir, file_name)

# Add doctree nodes for cell outputs.
for node, cell in zip(nodes, notebook.cells):
output_nodes = cell_output_to_nodes(
cell,
self.config.jupyter_execute_data_priority,
sphinx_abs_dir(self.env)
)
attach_outputs(output_nodes, node)


### Roles

def jupyter_download_role(name, rawtext, text, lineno, inliner):
_, filetype = name.split(':')
Expand All @@ -147,11 +248,43 @@ def jupyter_download_role(name, rawtext, text, lineno, inliner):
return [node], []


### Utilities

def blank_nb(kernel_name):
try:
spec = get_kernel_spec(kernel_name)
except NoSuchKernel as e:
raise ExtensionError('Unable to find kernel', orig_exc=e)
return nbformat.v4.new_notebook(metadata={
'kernelspec': {
'display_name': spec.display_name,
'language': spec.language,
'name': kernel_name,
}
})


def split_on(pred, it):
"""Split an iterator wherever a predicate is True."""

counter = 0

def count(x):
nonlocal counter
if pred(x):
counter += 1
return counter

# Return iterable of lists to ensure that we don't lose our
# place in the iterator
return (list(x) for _, x in groupby(it, count))


def cell_output_to_nodes(cell, data_priority, dir):
"""Convert a jupyter cell with outputs and filenames to doctree nodes.
Parameters
==========
----------
cell : jupyter cell
data_priority : list of mime types
Which media types to prioritize.
Expand Down Expand Up @@ -304,63 +437,6 @@ def sphinx_abs_dir(env):
)


class ExecuteJupyterCells(SphinxTransform):
default_priority = 180 # An early transform, idk

def apply(self):
doctree = self.document
doc_relpath = os.path.dirname(self.env.docname) # relative to src dir
docname = os.path.basename(self.env.docname)
default_kernel = self.config.jupyter_execute_default_kernel
default_names = default_notebook_names(docname)

# Check if we have anything to execute.
if not doctree.traverse(Cell):
return

logger.info('executing {}'.format(docname))
output_dir = os.path.join(output_directory(self.env), doc_relpath)

# Start new notebook whenever a KernelNode is encountered
nodes_by_notebook = split_on(
lambda n: isinstance(n, KernelNode),
doctree.traverse(lambda n: isinstance(n, (Cell, KernelNode)))
)

for first, *nodes in nodes_by_notebook:
if isinstance(first, KernelNode):
kernel_name = first['kernel_name'] or default_kernel
file_name = first['kernel_id'] or next(default_names)
else:
nodes = (first, *nodes)
kernel_name = default_kernel
file_name = next(default_names)

notebook = execute_cells(
kernel_name,
[nbformat.v4.new_code_cell(node.astext()) for node in nodes],
self.config.jupyter_execute_kwargs,
)

for node in nodes:
source = node.children[0]
lexer = notebook.metadata.language_info.pygments_lexer
source.attributes['language'] = lexer

# Modifies 'notebook' in-place, adding metadata specifying the
# filenames of the saved outputs.
write_notebook_output(notebook, output_dir, file_name)
# Add doctree nodes for cell output; images reference the filenames
# we just wrote to; sphinx copies these when writing outputs.
for node, cell in zip(nodes, notebook.cells):
output_nodes = cell_output_to_nodes(
cell,
self.config.jupyter_execute_data_priority,
sphinx_abs_dir(self.env)
)
attach_outputs(output_nodes, node)


def setup(app):
# Configuration
app.add_config_value(
Expand All @@ -386,13 +462,13 @@ def setup(app):
'env',
)

# KernelNode is just a doctree marker for the ExecuteJupyterCells
# transform, so we don't actually render it.
# JupyterKernelNode is just a doctree marker for the
# ExecuteJupyterCells transform, so we don't actually render it.
def skip(self, node):
raise docutils.nodes.SkipNode

app.add_node(
KernelNode,
JupyterKernelNode,
html=(skip, None),
latex=(skip, None),
textinfo=(skip, None),
Expand All @@ -401,13 +477,15 @@ def skip(self, node):
)


# JupyterCellNode is a container that holds the input and
# any output, so we render it as a container.
render_container = (
lambda self, node: self.visit_container(node),
lambda self, node: self.depart_container(node),
)

app.add_node(
Cell,
JupyterCellNode,
html=render_container,
latex=render_container,
textinfo=render_container,
Expand Down

0 comments on commit 9e9cf80

Please sign in to comment.