Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[8.x] [intersphinx] support for arbitrary title names #11932

Closed
89 changes: 86 additions & 3 deletions sphinx/util/inventory.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ def load(
return cls.load_v1(reader, uri, joinfunc)
elif line == '# Sphinx inventory version 2':
return cls.load_v2(reader, uri, joinfunc)
elif line == '# Sphinx inventory version 3':
return cls.load_v3(reader, uri, joinfunc)
else:
raise ValueError('invalid inventory header: %s' % line)

Expand Down Expand Up @@ -154,6 +156,78 @@ def load_v2(
invdata.setdefault(type, {})[name] = inv_item
return invdata

@classmethod
def load_v3(
cls: type[InventoryFile],
stream: InventoryFileReader,
uri: str,
join: Callable[[str, str], str],
) -> Inventory:
invdata: Inventory = {}
projname = stream.readline().rstrip()[11:]
version = stream.readline().rstrip()[11:]
line = stream.readline()
if 'zlib' not in line:
raise ValueError('invalid inventory header (not compressed): %s' % line)

data_before_name = re.compile(r'^(-?\d+)(:\d+)?\s', flags=re.VERBOSE)
# pattern when the name does not have spaces
name_pattern = re.compile(r'^(.+?)\s+\S+\s+?\S*\s+.*', flags=re.VERBOSE)
# pattern for the string after the name
data_after_name = re.compile(
r'^(?P<reftype>\S+)\s+(?P<location>\S*)\s+(?P<dispname>.*)',
flags=re.VERBOSE,
)

for line in stream.read_compressed_lines():
line = line.rstrip()

if (before_name := data_before_name.match(line)) is None:
continue

# currently, we do not use the priority, but maybe in the future
_, s_namesize = before_name.groups(None)

# remove what was just matched
line = line[before_name.end():]

if s_namesize is None:
if (m := name_pattern.match(line)) is None:
continue

name = m.group(1)
namesize = len(name)
else:
namesize = int(s_namesize[1:]) # remove leading ':'
name = line[:namesize]
assert len(name) == namesize

# remove the 'name' part
line = line[namesize + 1:]

if (data := data_after_name.match(line)) is None:
continue

reftype, location, dispname = data.groups()

if ':' not in reftype:
# wrong type value. type should be in the form of "{domain}:{objtype}"
#
# Note: To avoid the regex DoS, this is implemented in python (refs: #8175)
continue
if reftype == 'py:module' and reftype in invdata and name in invdata[reftype]:
# due to a bug in 1.1 and below,
# two inventory entries are created
# for Python modules, and the first
# one is correct
continue
if location.endswith('$'):
location = location[:-1] + name
location = join(uri, location)
inv_item: InventoryItem = projname, version, location, dispname
invdata.setdefault(reftype, {})[name] = inv_item
return invdata

@classmethod
def dump(
cls: type[InventoryFile], filename: str, env: BuildEnvironment, builder: Builder,
Expand All @@ -163,7 +237,7 @@ def escape(string: str) -> str:

with open(os.path.join(filename), 'wb') as f:
# header
f.write(('# Sphinx inventory version 2\n'
f.write(('# Sphinx inventory version 3\n'
'# Project: %s\n'
'# Version: %s\n'
'# The remainder of this file is compressed using zlib.\n' %
Expand All @@ -183,7 +257,16 @@ def escape(string: str) -> str:
uri += '#' + anchor
if dispname == name:
dispname = '-'
entry = ('%s %s:%s %s %s %s\n' %
(name, domainname, typ, prio, uri, dispname))

# For names with spaces, we need to know exactly where
# the ref-type string starts. Technically, we should not
# have ':' inside domain or role names, but extensions
# may have some weird role names and they could handle
# them internally to be docutils compatible. As such,
# we encode the length of the name after the priority.
slen = f':{len(name)}' if ' ' in name else ''
entry = '%s%s %s %s:%s %s %s\n' % (
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One alternative that I had in mind is to use a special character for the end of the name, but this means that we assume that this character is not used in the name (e.g., \x00). The size of the inventory won't be much bigger but we need to check that the name does not contain such character and raise an exception if this is the case during the writing phase.

prio, slen, name, domainname, typ, uri, dispname,
)
f.write(compressor.compress(entry.encode()))
f.write(compressor.flush())
4 changes: 4 additions & 0 deletions tests/roots/test-ext-intersphinx-ws/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
extensions = ['sphinx.ext.intersphinx', 'sphinx.ext.autosectionlabel']
autosectionlabel_prefix_document = True
autosectionlabel_maxdepth = 0
intersphinx_mapping = {}
47 changes: 47 additions & 0 deletions tests/roots/test-ext-intersphinx-ws/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
1 OK
----
:ref:`index:1 OK`

OK 1
----
:ref:`index:OK 1`

OK 1 OK
-------
:ref:`index:OK 1 OK`

123 OK
------
:ref:`index:123 OK`

1 2 OK
------
:ref:`index:1 2 OK`

1 2 3 OK
--------
:ref:`index:1 2 3 OK`

OK OK 1
-------
:ref:`index:OK OK 1`

OK OK 2 OK OK
-------------
:ref:`index:OK OK 2 OK OK`

OK 1 2 OK
---------
:ref:`index:OK 1 2 OK`

OK 1 OK 2
---------
:ref:`index:OK 1 OK 2`

OK 1 2 3
--------
:ref:`index:OK 1 2 3`

1 OK 1
------
:ref:`index:1 OK 1`
29 changes: 29 additions & 0 deletions tests/test_extensions/test_ext_intersphinx.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Test the intersphinx extension."""

import http.server
import posixpath
from unittest import mock

import pytest
Expand All @@ -19,6 +20,7 @@
)
from sphinx.ext.intersphinx import setup as intersphinx_setup
from sphinx.util.console import strip_colors
from sphinx.util.inventory import InventoryFile

from tests.test_util.test_util_inventory import inventory_v2, inventory_v2_not_having_version
from tests.utils import http_server
Expand Down Expand Up @@ -581,3 +583,30 @@ def test_intersphinx_role(app, warning):

# explicit title
assert html.format('index.html#foons') in content


@pytest.mark.sphinx('html', testroot='ext-intersphinx-ws')
def test_intersphinx_whitespace_targets(app):
app.build()

with open(app.outdir / 'objects.inv', 'rb') as fp:
invdata = InventoryFile.load(fp, '', posixpath.join)

assert invdata['std:label'] == {
'genindex': ('Python', '', 'genindex.html', 'Index'),
'index:1 2 3 ok': ('Python', '', 'index.html#id3', '1 2 3 OK'),
'index:1 2 ok': ('Python', '', 'index.html#id2', '1 2 OK'),
'index:1 ok': ('Python', '', 'index.html#ok', '1 OK'),
'index:1 ok 1': ('Python', '', 'index.html#id4', '1 OK 1'),
'index:123 ok': ('Python', '', 'index.html#id1', '123 OK'),
'index:ok 1': ('Python', '', 'index.html#ok-1', 'OK 1'),
'index:ok 1 2 3': ('Python', '', 'index.html#ok-1-2-3', 'OK 1 2 3'),
'index:ok 1 2 ok': ('Python', '', 'index.html#ok-1-2-ok', 'OK 1 2 OK'),
'index:ok 1 ok': ('Python', '', 'index.html#ok-1-ok', 'OK 1 OK'),
'index:ok 1 ok 2': ('Python', '', 'index.html#ok-1-ok-2', 'OK 1 OK 2'),
'index:ok ok 1': ('Python', '', 'index.html#ok-ok-1', 'OK OK 1'),
'index:ok ok 2 ok ok': ('Python', '', 'index.html#ok-ok-2-ok-ok', 'OK OK 2 OK OK'),
'modindex': ('Python', '', 'py-modindex.html', 'Module Index'),
'py-modindex': ('Python', '', 'py-modindex.html', 'Python Module Index'),
'search': ('Python', '', 'search.html', 'Search Page'),
}