sphinx-doc · picnixz · Feb 3, 2024 · Feb 3, 2024 · Feb 3, 2024 · Feb 3, 2024
diff --git a/sphinx/util/inventory.py b/sphinx/util/inventory.py
@@ -89,6 +89,8 @@ def load(
             return cls.load_v1(reader, uri, joinfunc)
         elif line == '# Sphinx inventory version 2':
             return cls.load_v2(reader, uri, joinfunc)
+        elif line == '# Sphinx inventory version 3':
+            return cls.load_v3(reader, uri, joinfunc)
         else:
             raise ValueError('invalid inventory header: %s' % line)
 
@@ -154,6 +156,78 @@ def load_v2(
             invdata.setdefault(type, {})[name] = inv_item
         return invdata
 
+    @classmethod
+    def load_v3(
+        cls: type[InventoryFile],
+        stream: InventoryFileReader,
+        uri: str,
+        join: Callable[[str, str], str],
+    ) -> Inventory:
+        invdata: Inventory = {}
+        projname = stream.readline().rstrip()[11:]
+        version = stream.readline().rstrip()[11:]
+        line = stream.readline()
+        if 'zlib' not in line:
+            raise ValueError('invalid inventory header (not compressed): %s' % line)
+
+        data_before_name = re.compile(r'^(-?\d+)(:\d+)?\s', flags=re.VERBOSE)
+        # pattern when the name does not have spaces
+        name_pattern = re.compile(r'^(.+?)\s+\S+\s+?\S*\s+.*', flags=re.VERBOSE)
+        # pattern for the string after the name
+        data_after_name = re.compile(
+            r'^(?P<reftype>\S+)\s+(?P<location>\S*)\s+(?P<dispname>.*)',
+            flags=re.VERBOSE,
+        )
+
+        for line in stream.read_compressed_lines():
+            line = line.rstrip()
+
+            if (before_name := data_before_name.match(line)) is None:
+                continue
+
+            # currently, we do not use the priority, but maybe in the future
+            _, s_namesize = before_name.groups(None)
+
+            # remove what was just matched
+            line = line[before_name.end():]
+
+            if s_namesize is None:
+                if (m := name_pattern.match(line)) is None:
+                    continue
+
+                name = m.group(1)
+                namesize = len(name)
+            else:
+                namesize = int(s_namesize[1:])  # remove leading ':'
+                name = line[:namesize]
+            assert len(name) == namesize
+
+            # remove the 'name' part
+            line = line[namesize + 1:]
+
+            if (data := data_after_name.match(line)) is None:
+                continue
+
+            reftype, location, dispname = data.groups()
+
+            if ':' not in reftype:
+                # wrong type value. type should be in the form of "{domain}:{objtype}"
+                #
+                # Note: To avoid the regex DoS, this is implemented in python (refs: #8175)
+                continue
+            if reftype == 'py:module' and reftype in invdata and name in invdata[reftype]:
+                # due to a bug in 1.1 and below,
+                # two inventory entries are created
+                # for Python modules, and the first
+                # one is correct
+                continue
+            if location.endswith('$'):
+                location = location[:-1] + name
+            location = join(uri, location)
+            inv_item: InventoryItem = projname, version, location, dispname
+            invdata.setdefault(reftype, {})[name] = inv_item
+        return invdata
+
     @classmethod
     def dump(
         cls: type[InventoryFile], filename: str, env: BuildEnvironment, builder: Builder,
@@ -163,7 +237,7 @@ def escape(string: str) -> str:
 
         with open(os.path.join(filename), 'wb') as f:
             # header
-            f.write(('# Sphinx inventory version 2\n'
+            f.write(('# Sphinx inventory version 3\n'
                      '# Project: %s\n'
                      '# Version: %s\n'
                      '# The remainder of this file is compressed using zlib.\n' %
@@ -183,7 +257,16 @@ def escape(string: str) -> str:
                         uri += '#' + anchor
                     if dispname == name:
                         dispname = '-'
-                    entry = ('%s %s:%s %s %s %s\n' %
-                             (name, domainname, typ, prio, uri, dispname))
+
+                    # For names with spaces, we need to know exactly where
+                    # the ref-type string starts. Technically, we should not
+                    # have ':' inside domain or role names, but extensions
+                    # may have some weird role names and they could handle
+                    # them internally to be docutils compatible. As such,
+                    # we encode the length of the name after the priority.
+                    slen = f':{len(name)}' if ' ' in name else ''
+                    entry = '%s%s %s %s:%s %s %s\n' % (
+                        prio, slen, name, domainname, typ, uri, dispname,
+                    )
                     f.write(compressor.compress(entry.encode()))
             f.write(compressor.flush())
diff --git a/tests/roots/test-ext-intersphinx-ws/conf.py b/tests/roots/test-ext-intersphinx-ws/conf.py
@@ -0,0 +1,4 @@
+extensions = ['sphinx.ext.intersphinx', 'sphinx.ext.autosectionlabel']
+autosectionlabel_prefix_document = True
+autosectionlabel_maxdepth = 0
+intersphinx_mapping = {}
diff --git a/tests/roots/test-ext-intersphinx-ws/index.rst b/tests/roots/test-ext-intersphinx-ws/index.rst
@@ -0,0 +1,47 @@
+1 OK
+----
+:ref:`index:1 OK`
+
+OK 1
+----
+:ref:`index:OK 1`
+
+OK 1 OK
+-------
+:ref:`index:OK 1 OK`
+
+123 OK
+------
+:ref:`index:123 OK`
+
+1 2 OK
+------
+:ref:`index:1 2 OK`
+
+1 2 3 OK
+--------
+:ref:`index:1 2 3 OK`
+
+OK OK 1
+-------
+:ref:`index:OK OK 1`
+
+OK OK 2 OK OK
+-------------
+:ref:`index:OK OK 2 OK OK`
+
+OK 1 2 OK
+---------
+:ref:`index:OK 1 2 OK`
+
+OK 1 OK 2
+---------
+:ref:`index:OK 1 OK 2`
+
+OK 1 2 3
+--------
+:ref:`index:OK 1 2 3`
+
+1 OK 1
+------
+:ref:`index:1 OK 1`
diff --git a/tests/test_extensions/test_ext_intersphinx.py b/tests/test_extensions/test_ext_intersphinx.py
@@ -1,6 +1,7 @@
 """Test the intersphinx extension."""
 
 import http.server
+import posixpath
 from unittest import mock
 
 import pytest
@@ -19,6 +20,7 @@
 )
 from sphinx.ext.intersphinx import setup as intersphinx_setup
 from sphinx.util.console import strip_colors
+from sphinx.util.inventory import InventoryFile
 
 from tests.test_util.test_util_inventory import inventory_v2, inventory_v2_not_having_version
 from tests.utils import http_server
@@ -581,3 +583,30 @@ def test_intersphinx_role(app, warning):
 
     # explicit title
     assert html.format('index.html#foons') in content
+
+
+@pytest.mark.sphinx('html', testroot='ext-intersphinx-ws')
+def test_intersphinx_whitespace_targets(app):
+    app.build()
+
+    with open(app.outdir / 'objects.inv', 'rb') as fp:
+        invdata = InventoryFile.load(fp, '', posixpath.join)
+
+    assert invdata['std:label'] == {
+        'genindex': ('Python', '', 'genindex.html', 'Index'),
+        'index:1 2 3 ok': ('Python', '', 'index.html#id3', '1 2 3 OK'),
+        'index:1 2 ok': ('Python', '', 'index.html#id2', '1 2 OK'),
+        'index:1 ok': ('Python', '', 'index.html#ok', '1 OK'),
+        'index:1 ok 1': ('Python', '', 'index.html#id4', '1 OK 1'),
+        'index:123 ok': ('Python', '', 'index.html#id1', '123 OK'),
+        'index:ok 1': ('Python', '', 'index.html#ok-1', 'OK 1'),
+        'index:ok 1 2 3': ('Python', '', 'index.html#ok-1-2-3', 'OK 1 2 3'),
+        'index:ok 1 2 ok': ('Python', '', 'index.html#ok-1-2-ok', 'OK 1 2 OK'),
+        'index:ok 1 ok': ('Python', '', 'index.html#ok-1-ok', 'OK 1 OK'),
+        'index:ok 1 ok 2': ('Python', '', 'index.html#ok-1-ok-2', 'OK 1 OK 2'),
+        'index:ok ok 1': ('Python', '', 'index.html#ok-ok-1', 'OK OK 1'),
+        'index:ok ok 2 ok ok': ('Python', '', 'index.html#ok-ok-2-ok-ok', 'OK OK 2 OK OK'),
+        'modindex': ('Python', '', 'py-modindex.html', 'Module Index'),
+        'py-modindex': ('Python', '', 'py-modindex.html', 'Python Module Index'),
+        'search': ('Python', '', 'search.html', 'Search Page'),
+    }