From 771e73436777637d15a23fb60d12fec3f6dd87cd Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Wed, 8 Nov 2023 19:13:55 -0600 Subject: [PATCH 1/2] Deduplicate dependencies for HTMLTextDocument --- CHANGELOG.md | 3 +++ htmltools/_core.py | 8 +++++++- tests/test_html_document.py | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f5ec027..4258275 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,9 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [UNRELEASED] * Changed the type annotation of `_add_ws` from `bool` to `TagAttrValue`. This makes it easier to write functions which call `Tag` functions and pass along `**kwargs`. (#67) + * Changed the type annotation of `collapse_` from `str` to `str | float | None`. This makes it easier to write calls to `css()` pass along `**kwargs`. (#68) + * Enhanced the type definition of `TagAttrs` to include `TagAttrDict`, the type of a `Tag`'s `attrs` property. (#55) +* For `HTMLTextDocument` objects, deduplicate HTML dependencies. (#72) ## [0.4.1] 2023-10-30 diff --git a/htmltools/_core.py b/htmltools/_core.py index dba0b42..7469b1c 100644 --- a/htmltools/_core.py +++ b/htmltools/_core.py @@ -1157,7 +1157,13 @@ def _static_extract_serialized_html_deps( # HTMLdependency.get_tag_representation() pattern = r'' dep_strs = re.findall(pattern, html) - # html = re.sub(pattern, "", html) + # Deduplicate dependencies. htmltools normally would dedupe dependencies, but + # with HTMLTextDocuments, the input HTML would usually have been generated by + # something else (like Quarto) and may not have the dependencies deduped. + dep_strs = list(set(dep_strs)) + + # Remove the serialized HTML dependencies from the HTML string + html = re.sub(pattern, "", html) deps: list[HTMLDependency] = [] for dep_str in dep_strs: diff --git a/tests/test_html_document.py b/tests/test_html_document.py index 48f8458..c3def7a 100644 --- a/tests/test_html_document.py +++ b/tests/test_html_document.py @@ -1,4 +1,5 @@ import os +import re import textwrap from tempfile import TemporaryDirectory from typing import Union @@ -291,14 +292,50 @@ def test_json_roundtrip(): div("hello world", testdep), # Also make sure it would work even with indents ht.HTML(testdep2.serialize_to_script_json(indent=2)), + # Add another copy of testdep, explicitly serialized to script json. + # Normally htmltools will dedupe dependencies when .render() is called, + # but we do this here because when these deps are embedded in a Quarto + # document, Quarto can add each dep independent of the others and + # therefore have duplicates. Since we're using .render, to get + # duplicates, we need to force the duplication. + # + div( + "hello again", + ht.HTML(testdep.serialize_to_script_json()), + ), ] ) + + # Get a string representation which hasn't been passed through + # HTMLTextDocument(). x_str = str(x) + + # Make sure that we successfully forced testdep to show up twice in the HTML, + # before we pass it to HTMLTextDocument() and call .render(). + assert x_str.count('"name": "testdep"') == 2 + + # Make sure that there are three of these HTML dependency script tags. + assert ( + x_str.count('