Skip to content

Commit

Permalink
rustdoc: improve rustdoc HTML suggestions handling of nested generics
Browse files Browse the repository at this point in the history
Based on some poor suggestions produced when stablizing this lint and running
it on `manformed-generics.rs`
  • Loading branch information
notriddle committed Sep 12, 2022
1 parent 98e1f04 commit 84ca399
Show file tree
Hide file tree
Showing 6 changed files with 330 additions and 13 deletions.
83 changes: 80 additions & 3 deletions src/librustdoc/passes/html_tags.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,34 @@ fn extract_path_backwards(text: &str, end_pos: usize) -> Option<usize> {
if current_pos == end_pos { None } else { Some(current_pos) }
}

fn extract_path_forward(text: &str, start_pos: usize) -> Option<usize> {
use rustc_lexer::{is_id_continue, is_id_start};
let mut current_pos = start_pos;
loop {
if current_pos < text.len() && text[current_pos..].starts_with("::") {
current_pos += 2;
} else {
break;
}
let mut chars = text[current_pos..].chars();
if let Some(c) = chars.next() {
if is_id_start(c) {
current_pos += c.len_utf8();
} else {
break;
}
}
while let Some(c) = chars.next() {
if is_id_continue(c) {
current_pos += c.len_utf8();
} else {
break;
}
}
}
if current_pos == start_pos { None } else { Some(current_pos) }
}

fn is_valid_for_html_tag_name(c: char, is_empty: bool) -> bool {
// https://spec.commonmark.org/0.30/#raw-html
//
Expand Down Expand Up @@ -218,19 +246,68 @@ impl<'a, 'tcx> DocVisitor for InvalidHtmlTagsLinter<'a, 'tcx> {
// If a tag looks like `<this>`, it might actually be a generic.
// We don't try to detect stuff `<like, this>` because that's not valid HTML,
// and we don't try to detect stuff `<like this>` because that's not valid Rust.
if let Some(Some(generics_start)) = (is_open_tag
&& dox[..range.end].ends_with('>'))
let mut generics_end = range.end;
if let Some(Some(mut generics_start)) = (is_open_tag
&& dox[..generics_end].ends_with('>'))
.then(|| extract_path_backwards(&dox, range.start))
{
while generics_start != 0
&& generics_end < dox.len()
&& dox.as_bytes()[generics_start - 1] == b'<'
&& dox.as_bytes()[generics_end] == b'>'
{
generics_end += 1;
generics_start -= 1;
if let Some(new_start) = extract_path_backwards(&dox, generics_start) {
generics_start = new_start;
}
if let Some(new_end) = extract_path_forward(&dox, generics_end) {
generics_end = new_end;
}
}
if let Some(new_end) = extract_path_forward(&dox, generics_end) {
generics_end = new_end;
}
let generics_sp = match super::source_span_for_markdown_range(
tcx,
&dox,
&(generics_start..range.end),
&(generics_start..generics_end),
&item.attrs,
) {
Some(sp) => sp,
None => item.attr_span(tcx),
};
// Sometimes, we only extract part of a path. For example, consider this:
//
// <[u32] as IntoIter<u32>>::Item
// ^^^^^ unclosed HTML tag `u32`
//
// We don't have any code for parsing fully-qualified trait paths.
// In theory, we could add it, but doing it correctly would require
// parsing the entire path grammar, which is problematic because of
// overlap between the path grammar and Markdown.
//
// The example above shows that ambiguity. Is `[u32]` intended to be an
// intra-doc link to the u32 primitive, or is it intended to be a slice?
//
// If the below conditional were removed, we would suggest this, which is
// not what the user probably wants.
//
// <[u32] as `IntoIter<u32>`>::Item
//
// We know that the user actually wants to wrap the whole thing in a code
// block, but the only reason we know that is because `u32` does not, in
// fact, implement IntoIter. If the example looks like this:
//
// <[Vec<i32>] as IntoIter<i32>::Item
//
// The ideal fix would be significantly different.
if (generics_start > 0 && dox.as_bytes()[generics_start - 1] == b'<')
|| (generics_end < dox.len() && dox.as_bytes()[generics_end] == b'>')
{
diag.emit();
return;
}
// multipart form is chosen here because ``Vec<i32>`` would be confusing.
diag.multipart_suggestion(
"try marking as source code",
Expand Down
42 changes: 42 additions & 0 deletions src/test/rustdoc-ui/suggestions/html-as-generics-no-suggestions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,48 @@ pub struct ConstGeneric;
// HTML tags cannot contain commas, so no error.
pub struct MultipleGenerics;

/// This <[u32] as Iterator<Item>> thing!
//~^ERROR unclosed HTML tag `Item`
// Some forms of fully-qualified path are simultaneously valid HTML tags
// with attributes. They produce an error, but no suggestion, because figuring
// out if this is valid would require parsing the entire path grammar.
//
// The important part is that we don't produce any *wrong* suggestions.
// While several other examples below are added to make sure we don't
// produce suggestions when given complex paths, this example is the actual
// reason behind not just using the real path parser. It's ambiguous: there's
// no way to locally reason out whether that `[u32]` is intended to be a slice
// or an intra-doc link.
pub struct FullyQualifiedPathsDoNotCount;

/// This <Vec as IntoIter>::Iter thing!
//~^ERROR unclosed HTML tag `Vec`
// Some forms of fully-qualified path are simultaneously valid HTML tags
// with attributes. They produce an error, but no suggestion, because figuring
// out if this is valid would require parsing the entire path grammar.
pub struct FullyQualifiedPathsDoNotCount1;

/// This Vec<Vec as IntoIter>::Iter thing!
//~^ERROR unclosed HTML tag `Vec`
// Some forms of fully-qualified path are simultaneously valid HTML tags
// with attributes. They produce an error, but no suggestion, because figuring
// out if this is valid would require parsing the entire path grammar.
pub struct FullyQualifiedPathsDoNotCount2;

/// This Vec<Vec as IntoIter> thing!
//~^ERROR unclosed HTML tag `Vec`
// Some forms of fully-qualified path are simultaneously valid HTML tags
// with attributes. They produce an error, but no suggestion, because figuring
// out if this is valid would require parsing the entire path grammar.
pub struct FullyQualifiedPathsDoNotCount3;

/// This Vec<Vec<i32> as IntoIter> thing!
//~^ERROR unclosed HTML tag `i32`
// Some forms of fully-qualified path are simultaneously valid HTML tags
// with attributes. They produce an error, but no suggestion, because figuring
// out if this is valid would require parsing the entire path grammar.
pub struct FullyQualifiedPathsDoNotCount4;

/// This Vec<i32 class="test"> thing!
//~^ERROR unclosed HTML tag `i32`
// HTML attributes shouldn't be treated as Rust syntax, so no suggestions.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,38 +1,68 @@
error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:11:13
error: unclosed HTML tag `Item`
--> $DIR/html-as-generics-no-suggestions.rs:11:28
|
LL | /// This Vec<i32 class="test"> thing!
| ^^^^
LL | /// This <[u32] as Iterator<Item>> thing!
| ^^^^^^
|
note: the lint level is defined here
--> $DIR/html-as-generics-no-suggestions.rs:1:9
|
LL | #![deny(rustdoc::invalid_html_tags)]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^

error: unclosed HTML tag `Vec`
--> $DIR/html-as-generics-no-suggestions.rs:25:10
|
LL | /// This <Vec as IntoIter>::Iter thing!
| ^^^^

error: unclosed HTML tag `Vec`
--> $DIR/html-as-generics-no-suggestions.rs:32:13
|
LL | /// This Vec<Vec as IntoIter>::Iter thing!
| ^^^^

error: unclosed HTML tag `Vec`
--> $DIR/html-as-generics-no-suggestions.rs:39:13
|
LL | /// This Vec<Vec as IntoIter> thing!
| ^^^^

error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:46:17
|
LL | /// This Vec<Vec<i32> as IntoIter> thing!
| ^^^^^

error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:53:13
|
LL | /// This Vec<i32 class="test"> thing!
| ^^^^

error: unopened HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:20:13
--> $DIR/html-as-generics-no-suggestions.rs:62:13
|
LL | /// This Vec</i32> thing!
| ^^^^^^

error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:25:13
--> $DIR/html-as-generics-no-suggestions.rs:67:13
|
LL | /// This 123<i32> thing!
| ^^^^^

error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:30:14
--> $DIR/html-as-generics-no-suggestions.rs:72:14
|
LL | /// This Vec:<i32> thing!
| ^^^^^

error: unclosed HTML tag `i32`
--> $DIR/html-as-generics-no-suggestions.rs:35:39
--> $DIR/html-as-generics-no-suggestions.rs:77:39
|
LL | /// This [link](https://rust-lang.org)<i32> thing!
| ^^^^^

error: aborting due to 5 previous errors
error: aborting due to 10 previous errors

40 changes: 40 additions & 0 deletions src/test/rustdoc-ui/suggestions/html-as-generics.fixed
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,43 @@ pub struct BareTurbofish;
//~^ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct Nested;

/// Nested generics `Vec<Vec<u32>>`
//~^ ERROR unclosed HTML tag `u32`
//~|HELP try marking as source
pub struct NestedGenerics;

/// Generics with path `Vec<i32>::Iter`
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct GenericsWithPath;

/// Generics with path `<Vec<i32>>::Iter`
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPath;

/// Generics with path `Vec<Vec<i32>>::Iter`
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPath2;

/// Generics with bump `<Vec<i32>>`s
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithBump;

/// Generics with bump `Vec<Vec<i32>>`s
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithBump2;

/// Generics with punct `<Vec<i32>>`!
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPunct;

/// Generics with punct `Vec<Vec<i32>>`!
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPunct2;
40 changes: 40 additions & 0 deletions src/test/rustdoc-ui/suggestions/html-as-generics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,43 @@ pub struct BareTurbofish;
//~^ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct Nested;

/// Nested generics Vec<Vec<u32>>
//~^ ERROR unclosed HTML tag `u32`
//~|HELP try marking as source
pub struct NestedGenerics;

/// Generics with path Vec<i32>::Iter
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct GenericsWithPath;

/// Generics with path <Vec<i32>>::Iter
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPath;

/// Generics with path Vec<Vec<i32>>::Iter
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPath2;

/// Generics with bump <Vec<i32>>s
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithBump;

/// Generics with bump Vec<Vec<i32>>s
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithBump2;

/// Generics with punct <Vec<i32>>!
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPunct;

/// Generics with punct Vec<Vec<i32>>!
//~^ ERROR unclosed HTML tag `i32`
//~|HELP try marking as source
pub struct NestedGenericsWithPunct2;
Loading

0 comments on commit 84ca399

Please sign in to comment.