From 5fd366439852bc20cf7eec4d466c50e4c80257eb Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Thu, 15 Sep 2022 14:01:18 +0200 Subject: [PATCH 1/3] feat(rome-formatter): Comments Map This PR implements a multimap for storing the leading, dangling, and trailing comments for nodes. An implementation using a general-purpose implementation would use a multimaps for the leading, trailing, and dangling comments and each multimap would allocate a `Vec` for every node with a comment. This purpose-built multimap uses a shared `Vec` to store all comments where this is possible (99.99% of comments) and only allocates node specific `Vec`s if necessary. The idea behind the implementation is that comments should maintain the same order as in the source document. That means: * All comments for a node are inserted at "the same" time (one at a time, but all together) * It first inserts the leading, then dangling, and finally, a node's trailing comments. --- Cargo.lock | 1 + crates/rome_formatter/Cargo.toml | 1 + crates/rome_formatter/src/comments.rs | 2 + crates/rome_formatter/src/comments/map.rs | 802 ++++++++++++++++++++++ 4 files changed, 806 insertions(+) create mode 100644 crates/rome_formatter/src/comments/map.rs diff --git a/Cargo.lock b/Cargo.lock index 548c1814c7e5..31dae1023f2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1569,6 +1569,7 @@ name = "rome_formatter" version = "0.0.0" dependencies = [ "cfg-if", + "countme", "indexmap", "rome_rowan", "rustc-hash", diff --git a/crates/rome_formatter/Cargo.toml b/crates/rome_formatter/Cargo.toml index c288a1f60358..fde1f7301126 100644 --- a/crates/rome_formatter/Cargo.toml +++ b/crates/rome_formatter/Cargo.toml @@ -16,6 +16,7 @@ cfg-if = "1.0.0" indexmap = "1.8.2" schemars = { version = "0.8.10", optional = true } rustc-hash = "1.1.0" +countme = "3.0.1" [features] serde = ["dep:serde", "schemars", "rome_rowan/serde"] diff --git a/crates/rome_formatter/src/comments.rs b/crates/rome_formatter/src/comments.rs index d6c34cad7425..4c53a59b443a 100644 --- a/crates/rome_formatter/src/comments.rs +++ b/crates/rome_formatter/src/comments.rs @@ -1,3 +1,5 @@ +mod map; + use rome_rowan::{ Direction, Language, SyntaxElement, SyntaxKind, SyntaxNode, SyntaxTriviaPieceComments, WalkEvent, diff --git a/crates/rome_formatter/src/comments/map.rs b/crates/rome_formatter/src/comments/map.rs new file mode 100644 index 000000000000..90d08949bfd4 --- /dev/null +++ b/crates/rome_formatter/src/comments/map.rs @@ -0,0 +1,802 @@ +use countme::Count; +use rustc_hash::FxHashMap; +use std::fmt::{Debug, Formatter}; +use std::iter::FusedIterator; +use std::num::NonZeroU32; +use std::ops::Range; + +/// An optimized multi-map implementation for storing leading, dangling, and trailing parts for a key. +/// +/// The implementation allows to store leading, dangling, and trailing parts for every key. +/// A general purpose multimap implementation would allocate a new [Vec] for the leading, dangling, and trailing +/// parts for every key. +/// +/// This implementation avoids this based on the assumptions that, for the vast majority of insertions: +/// * The leading, dangling, and trailing parts for a key are inserted in this order. +/// * The parts are inserted per key. For example, the parts for key one are inserted before the parts for key 2, and so on. +/// +/// Parts that are inserted in compilation with these assumption are stored in a single [Vec] that is +/// shared between all keys to significantly reduce the number of allocated [Vec]s. +/// +/// The implementation does support insertions that don't comply with the above mentioned rules but +/// these come at a performance penalty: +/// * It requires allocating three [Vec], one for the leading, dangling, and trailing parts. +/// * Already inserted parts must be copied over (by cloning) into these newly allocated [Vec]s. +/// * Resolving the slices for every part requires an extra level of indirection. +/// +/// ## Panics +/// Panics when storing the `u32::MAX` part. +pub(super) struct CommentsMap { + /// Lookup table to retrieve the entry for a key. + index: FxHashMap, + + /// Flat array storing all the values that have been inserted in order. + parts: Vec, + + /// Vector of vectors. Stores a triple of leading, dangling, and trailing [Vec] for every out of order entry. + /// + /// The length of this vec is guaranteed to always be a multiple of 3 where the leading parts are + /// at offset 0, the dangling at offset 1, and the trailing parts at offset 2. + out_of_order: Vec>, +} + +impl CommentsMap { + pub fn new() -> Self { + Self { + index: FxHashMap::default(), + parts: Vec::new(), + out_of_order: Vec::new(), + } + } + + /// Pushes a leading part for `key`. + pub fn push_leading(&mut self, key: K, part: V) + where + V: Clone, + { + match self.index.get_mut(&key) { + None => { + let start = self.parts.len(); + self.parts.push(part); + + self.index.insert( + key, + Entry::InOrder(InOrderEntry::leading(start..self.parts.len())), + ); + } + + // Has only leading comments and no elements have been pushed since + Some(Entry::InOrder(entry)) + if entry.trailing_start.is_none() && self.parts.len() == entry.range().end => + { + self.parts.push(part); + entry.increment_leading_range(); + } + + Some(Entry::OutOfOrder(entry)) => { + let leading = &mut self.out_of_order[entry.leading_index()]; + leading.push(part); + } + + Some(entry) => { + let out_of_order = + Self::entry_to_out_of_order(entry, &self.parts, &mut self.out_of_order); + self.out_of_order[out_of_order.leading_index()].push(part); + } + } + } + + /// Pushes a dangling part for `key` + pub fn push_dangling(&mut self, key: K, part: V) + where + V: Clone, + { + match self.index.get_mut(&key) { + None => { + let start = self.parts.len(); + self.parts.push(part); + + self.index.insert( + key, + Entry::InOrder(InOrderEntry::dangling(start..self.parts.len())), + ); + } + + // Has leading and dangling comments and its comments are at the end of values + Some(Entry::InOrder(entry)) + if entry.trailing_end.is_none() && self.parts.len() == entry.range().end => + { + self.parts.push(part); + entry.increment_dangling_range(); + } + + Some(Entry::OutOfOrder(entry)) => { + let dangling = &mut self.out_of_order[entry.dangling_index()]; + dangling.push(part); + } + + Some(entry) => { + let out_of_order = + Self::entry_to_out_of_order(entry, &self.parts, &mut self.out_of_order); + self.out_of_order[out_of_order.dangling_index()].push(part); + } + } + } + + /// Pushes a trailing part for `key`. + pub fn push_trailing(&mut self, key: K, part: V) + where + V: Clone, + { + match self.index.get_mut(&key) { + None => { + let start = self.parts.len(); + self.parts.push(part); + + self.index.insert( + key, + Entry::InOrder(InOrderEntry::trailing(start..self.parts.len())), + ); + } + + // Its comments are at the end + Some(Entry::InOrder(entry)) if entry.range().end == self.parts.len() => { + self.parts.push(part); + entry.increment_trailing_range(); + } + + Some(Entry::OutOfOrder(entry)) => { + let trailing = &mut self.out_of_order[entry.trailing_index()]; + trailing.push(part); + } + + Some(entry) => { + let out_of_order = + Self::entry_to_out_of_order(entry, &self.parts, &mut self.out_of_order); + self.out_of_order[out_of_order.trailing_index()].push(part); + } + } + } + + #[cold] + fn entry_to_out_of_order<'a>( + entry: &'a mut Entry, + values: &[V], + out_of_order: &mut Vec>, + ) -> &'a mut OutOfOrderEntry + where + V: Clone, + { + match entry { + Entry::InOrder(in_order) => { + let index = out_of_order.len(); + + out_of_order.push(values[in_order.leading_range()].to_vec()); + out_of_order.push(values[in_order.dangling_range()].to_vec()); + out_of_order.push(values[in_order.trailing_range()].to_vec()); + + *entry = Entry::OutOfOrder(OutOfOrderEntry { + leading_index: index, + _count: Count::new(), + }); + + match entry { + Entry::InOrder(_) => unreachable!(), + Entry::OutOfOrder(out_of_order) => out_of_order, + } + } + Entry::OutOfOrder(entry) => entry, + } + } + + /// Retrieves all leading parts of `key` + pub fn leading(&self, key: &K) -> &[V] { + match self.index.get(key) { + None => &[], + Some(Entry::InOrder(in_order)) => &self.parts[in_order.leading_range()], + Some(Entry::OutOfOrder(entry)) => &self.out_of_order[entry.leading_index()], + } + } + + /// Retrieves all dangling parts of `key`. + pub fn dangling(&self, key: &K) -> &[V] { + match self.index.get(key) { + None => &[], + Some(Entry::InOrder(in_order)) => &self.parts[in_order.dangling_range()], + Some(Entry::OutOfOrder(entry)) => &self.out_of_order[entry.dangling_index()], + } + } + + /// Retrieves all trailing parts of `key`. + pub fn trailing(&self, key: &K) -> &[V] { + match self.index.get(key) { + None => &[], + Some(Entry::InOrder(in_order)) => &self.parts[in_order.trailing_range()], + Some(Entry::OutOfOrder(entry)) => &self.out_of_order[entry.trailing_index()], + } + } + + /// Returns `true` if `key` has any leading, dangling, or trailing part. + pub fn has(&self, key: &K) -> bool { + self.index.get(key).is_some() + } + + /// Returns an iterator over all leading, dangling, and trailing parts of `key`. + pub fn parts(&self, key: &K) -> ValuesIterator { + match self.index.get(key) { + None => ValuesIterator::Slice([].iter()), + Some(Entry::OutOfOrder(entry)) => ValuesIterator::Leading { + leading: self.out_of_order[entry.leading_index()].iter(), + dangling: &self.out_of_order[entry.dangling_index()], + trailing: &self.out_of_order[entry.trailing_index()], + }, + Some(Entry::InOrder(entry)) => ValuesIterator::Slice(self.parts[entry.range()].iter()), + } + } +} + +impl Default for CommentsMap { + fn default() -> Self { + Self::new() + } +} + +impl std::fmt::Debug for CommentsMap +where + K: std::fmt::Debug, + V: std::fmt::Debug, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut builder = f.debug_map(); + + for (key, entry) in &self.index { + builder.entry(&key, &DebugEntry { entry, map: self }); + } + + builder.finish() + } +} + +/// Iterator to iterate over all leading, dangling, and trailing parts of a key. +pub(super) enum ValuesIterator<'a, V> { + /// The slice into the [CommentsMap::parts] [Vec] if this is an in-order entry or the trailing parts + /// of an out-of-order entry. + Slice(std::slice::Iter<'a, V>), + + /// Iterator over the leading parts of an out-of-order entry. Returns the dangling parts, and then the + /// trailing parts once the leading iterator is fully consumed. + Leading { + leading: std::slice::Iter<'a, V>, + dangling: &'a [V], + trailing: &'a [V], + }, + + /// Iterator over the dangling parts of an out-of-order entry. Returns the trailing parts + /// once the leading iterator is fully consumed. + Dangling { + dangling: std::slice::Iter<'a, V>, + trailing: &'a [V], + }, +} + +impl<'a, V> Iterator for ValuesIterator<'a, V> { + type Item = &'a V; + + fn next(&mut self) -> Option { + match self { + ValuesIterator::Slice(inner) => inner.next(), + + ValuesIterator::Leading { + leading, + dangling, + trailing, + } => match leading.next() { + Some(next) => Some(next), + None if !dangling.is_empty() => { + let mut dangling_iterator = dangling.iter(); + let next = dangling_iterator.next().unwrap(); + *self = ValuesIterator::Dangling { + dangling: dangling_iterator, + trailing, + }; + Some(next) + } + None => { + let mut trailing_iterator = trailing.iter(); + let next = trailing_iterator.next(); + *self = ValuesIterator::Slice(trailing_iterator); + next + } + }, + + ValuesIterator::Dangling { dangling, trailing } => match dangling.next() { + Some(next) => Some(next), + None => { + let mut trailing_iterator = trailing.iter(); + let next = trailing_iterator.next(); + *self = ValuesIterator::Slice(trailing_iterator); + next + } + }, + } + } + + fn size_hint(&self) -> (usize, Option) { + match self { + ValuesIterator::Slice(slice) => slice.size_hint(), + ValuesIterator::Leading { + leading, + dangling, + trailing, + } => { + let len = leading.len() + dangling.len() + trailing.len(); + + (len, Some(len)) + } + ValuesIterator::Dangling { dangling, trailing } => { + let len = dangling.len() + trailing.len(); + (len, Some(len)) + } + } + } + + fn last(self) -> Option + where + Self: Sized, + { + match self { + ValuesIterator::Slice(slice) => slice.last(), + ValuesIterator::Leading { + leading, + dangling, + trailing, + } => trailing + .last() + .or_else(|| dangling.last()) + .or_else(|| leading.last()), + ValuesIterator::Dangling { dangling, trailing } => { + trailing.last().or_else(|| dangling.last()) + } + } + } +} + +impl ExactSizeIterator for ValuesIterator<'_, V> {} + +impl FusedIterator for ValuesIterator<'_, V> {} + +#[derive(Debug)] +enum Entry { + InOrder(InOrderEntry), + OutOfOrder(OutOfOrderEntry), +} + +struct DebugEntry<'a, K, V> { + entry: &'a Entry, + map: &'a CommentsMap, +} + +impl Debug for DebugEntry<'_, K, V> +where + K: Debug, + V: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let leading = match self.entry { + Entry::OutOfOrder(entry) => self.map.out_of_order[entry.leading_index()].as_slice(), + Entry::InOrder(entry) => &self.map.parts[entry.leading_range()], + }; + + let dangling = match self.entry { + Entry::OutOfOrder(entry) => self.map.out_of_order[entry.dangling_index()].as_slice(), + Entry::InOrder(entry) => &self.map.parts[entry.dangling_range()], + }; + + let trailing = match self.entry { + Entry::OutOfOrder(entry) => self.map.out_of_order[entry.trailing_index()].as_slice(), + Entry::InOrder(entry) => &self.map.parts[entry.trailing_range()], + }; + + let mut list = f.debug_list(); + + list.entries(leading.iter().map(DebugValue::Leading)); + list.entries(dangling.iter().map(DebugValue::Dangling)); + list.entries(trailing.iter().map(DebugValue::Trailing)); + + list.finish() + } +} + +enum DebugValue<'a, V> { + Leading(&'a V), + Dangling(&'a V), + Trailing(&'a V), +} + +impl Debug for DebugValue<'_, V> +where + V: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + DebugValue::Leading(leading) => f.debug_tuple("Leading").field(leading).finish(), + DebugValue::Dangling(dangling) => f.debug_tuple("Dangling").field(dangling).finish(), + DebugValue::Trailing(trailing) => f.debug_tuple("Trailing").field(trailing).finish(), + } + } +} + +#[derive(Debug)] +struct InOrderEntry { + /// Index into the [CommentsMap::parts] vector where the leading parts of this entry start + leading_start: PartIndex, + + /// Index into the [CommentsMap::parts] vector where the dangling parts (and, thus, the leading parts end) start. + dangling_start: PartIndex, + + /// Index into the [CommentsMap::parts] vector where the trailing parts (and, thus, the dangling parts end) of this entry start + trailing_start: Option, + + /// Index into the [CommentsMap::parts] vector where the trailing parts of this entry end + trailing_end: Option, + + _count: Count, +} + +impl InOrderEntry { + fn leading(range: Range) -> Self { + InOrderEntry { + leading_start: PartIndex::from_len(range.start), + dangling_start: PartIndex::from_len(range.end), + trailing_start: None, + trailing_end: None, + _count: Count::new(), + } + } + + fn dangling(range: Range) -> Self { + let start = PartIndex::from_len(range.start); + InOrderEntry { + leading_start: start, + dangling_start: start, + trailing_start: Some(PartIndex::from_len(range.end)), + trailing_end: None, + _count: Count::new(), + } + } + + fn trailing(range: Range) -> Self { + let start = PartIndex::from_len(range.start); + InOrderEntry { + leading_start: start, + dangling_start: start, + trailing_start: Some(start), + trailing_end: Some(PartIndex::from_len(range.end)), + _count: Count::new(), + } + } + + fn increment_leading_range(&mut self) { + assert!( + self.trailing_start.is_none(), + "Can't extend the leading range for an in order entry with dangling comments." + ); + + self.dangling_start.increment(); + } + + fn increment_dangling_range(&mut self) { + assert!( + self.trailing_end.is_none(), + "Can't extend the dangling range for an in order entry with trailing comments." + ); + + match &mut self.trailing_start { + Some(start) => start.increment(), + None => self.trailing_start = Some(self.dangling_start.incremented()), + } + } + + fn increment_trailing_range(&mut self) { + match (self.trailing_start, &mut self.trailing_end) { + // Already has some trailing comments + (Some(_), Some(end)) => end.increment(), + // Has dangling comments only + (Some(start), None) => self.trailing_end = Some(start.incremented()), + // Has leading comments only + (None, None) => { + self.trailing_start = Some(self.dangling_start); + self.trailing_end = Some(self.dangling_start.incremented()) + } + (None, Some(_)) => { + unreachable!() + } + } + } + + fn leading_range(&self) -> Range { + self.leading_start.value()..self.dangling_start.value() + } + + fn dangling_range(&self) -> Range { + match self.trailing_start { + None => self.dangling_start.value()..self.dangling_start.value(), + Some(trailing_start) => self.dangling_start.value()..trailing_start.value(), + } + } + + fn trailing_range(&self) -> Range { + match (self.trailing_start, self.trailing_end) { + (Some(trailing_start), Some(trailing_end)) => { + trailing_start.value()..trailing_end.value() + } + // Only dangling comments + (Some(trailing_start), None) => trailing_start.value()..trailing_start.value(), + (None, Some(_)) => { + panic!("Trailing end shouldn't be set if trailing start is none"); + } + (None, None) => self.dangling_start.value()..self.dangling_start.value(), + } + } + + fn range(&self) -> Range { + self.leading_start.value() + ..self + .trailing_end + .or(self.trailing_start) + .unwrap_or(self.dangling_start) + .value() + } +} + +#[derive(Debug)] +struct OutOfOrderEntry { + /// Index into the [CommentsMap::out_of_order] vector at which offset the leaading vec is stored. + leading_index: usize, + _count: Count, +} + +impl OutOfOrderEntry { + const fn leading_index(&self) -> usize { + self.leading_index + } + + const fn dangling_index(&self) -> usize { + self.leading_index + 1 + } + + const fn trailing_index(&self) -> usize { + self.leading_index + 2 + } +} + +/// Index into the [CommentsMap::parts] vector. +/// +/// Stores the index as a [NonZeroU32], starting at 1 instead of 0 so that +/// `size_of::() == size_of::>()`. +/// +/// This means, that only `u32 - 1` parts can be stored. This should be sufficient for storing comments +/// because: Comments have length of two or more bytes because they consist of a start and end character sequence (`#` + new line, `/*` and `*/`). +/// Thus, a document with length `u32` can have at most `u32::MAX / 2` comment-parts. +#[repr(transparent)] +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +struct PartIndex(NonZeroU32); + +impl PartIndex { + fn from_len(value: usize) -> Self { + Self(NonZeroU32::try_from(value as u32 + 1).unwrap()) + } + + fn value(&self) -> usize { + (u32::from(self.0) - 1) as usize + } + + fn increment(&mut self) { + *self = self.incremented(); + } + + fn incremented(&self) -> PartIndex { + PartIndex(NonZeroU32::new(self.0.get() + 1).unwrap()) + } +} + +#[cfg(test)] +mod tests { + use crate::comments::map::CommentsMap; + + static EMPTY: [i32; 0] = []; + + #[test] + fn leading_dangling_trailing() { + let mut map = CommentsMap::new(); + + map.push_leading("a", 1); + map.push_dangling("a", 2); + map.push_dangling("a", 3); + map.push_trailing("a", 4); + + assert_eq!(map.parts, vec![1, 2, 3, 4]); + + assert_eq!(map.leading(&"a"), &[1]); + assert_eq!(map.dangling(&"a"), &[2, 3]); + assert_eq!(map.trailing(&"a"), &[4]); + + assert!(map.has(&"a")); + + assert_eq!( + map.parts(&"a").copied().collect::>(), + vec![1, 2, 3, 4] + ); + } + + #[test] + fn dangling_trailing() { + let mut map = CommentsMap::new(); + + map.push_dangling("a", 1); + map.push_dangling("a", 2); + map.push_trailing("a", 3); + + assert_eq!(map.parts, vec![1, 2, 3]); + + assert_eq!(map.leading(&"a"), &EMPTY); + assert_eq!(map.dangling(&"a"), &[1, 2]); + assert_eq!(map.trailing(&"a"), &[3]); + + assert!(map.has(&"a")); + + assert_eq!(map.parts(&"a").copied().collect::>(), vec![1, 2, 3]); + } + + #[test] + fn trailing() { + let mut map = CommentsMap::new(); + + map.push_trailing("a", 1); + map.push_trailing("a", 2); + + assert_eq!(map.parts, vec![1, 2]); + + assert_eq!(map.leading(&"a"), &EMPTY); + assert_eq!(map.dangling(&"a"), &EMPTY); + assert_eq!(map.trailing(&"a"), &[1, 2]); + + assert!(map.has(&"a")); + + assert_eq!(map.parts(&"a").copied().collect::>(), vec![1, 2]); + } + + #[test] + fn empty() { + let map = CommentsMap::<&str, i32>::default(); + + assert_eq!(map.parts, Vec::::new()); + + assert_eq!(map.leading(&"a"), &EMPTY); + assert_eq!(map.dangling(&"a"), &EMPTY); + assert_eq!(map.trailing(&"a"), &EMPTY); + + assert!(!map.has(&"a")); + + assert_eq!( + map.parts(&"a").copied().collect::>(), + Vec::::new() + ); + } + + #[test] + fn multiple_keys() { + let mut map = CommentsMap::new(); + + map.push_leading("a", 1); + map.push_dangling("b", 2); + map.push_trailing("c", 3); + map.push_leading("d", 4); + map.push_dangling("d", 5); + map.push_trailing("d", 6); + + assert_eq!(map.parts, &[1, 2, 3, 4, 5, 6]); + + assert_eq!(map.leading(&"a"), &[1]); + assert_eq!(map.dangling(&"a"), &EMPTY); + assert_eq!(map.trailing(&"a"), &EMPTY); + assert_eq!(map.parts(&"a").copied().collect::>(), vec![1]); + + assert_eq!(map.leading(&"b"), &EMPTY); + assert_eq!(map.dangling(&"b"), &[2]); + assert_eq!(map.trailing(&"b"), &EMPTY); + assert_eq!(map.parts(&"b").copied().collect::>(), vec![2]); + + assert_eq!(map.leading(&"c"), &EMPTY); + assert_eq!(map.dangling(&"c"), &EMPTY); + assert_eq!(map.trailing(&"c"), &[3]); + assert_eq!(map.parts(&"c").copied().collect::>(), vec![3]); + + assert_eq!(map.leading(&"d"), &[4]); + assert_eq!(map.dangling(&"d"), &[5]); + assert_eq!(map.trailing(&"d"), &[6]); + assert_eq!(map.parts(&"d").copied().collect::>(), vec![4, 5, 6]); + } + + #[test] + fn dangling_leading() { + let mut map = CommentsMap::new(); + + map.push_dangling("a", 1); + map.push_leading("a", 2); + map.push_dangling("a", 3); + map.push_trailing("a", 4); + + assert_eq!(map.leading(&"a"), [2]); + assert_eq!(map.dangling(&"a"), [1, 3]); + assert_eq!(map.trailing(&"a"), [4]); + + assert_eq!( + map.parts(&"a").copied().collect::>(), + vec![2, 1, 3, 4] + ); + + assert!(map.has(&"a")); + } + + #[test] + fn trailing_leading() { + let mut map = CommentsMap::new(); + + map.push_trailing("a", 1); + map.push_leading("a", 2); + map.push_dangling("a", 3); + map.push_trailing("a", 4); + + assert_eq!(map.leading(&"a"), [2]); + assert_eq!(map.dangling(&"a"), [3]); + assert_eq!(map.trailing(&"a"), [1, 4]); + + assert_eq!( + map.parts(&"a").copied().collect::>(), + vec![2, 3, 1, 4] + ); + + assert!(map.has(&"a")); + } + + #[test] + fn trailing_dangling() { + let mut map = CommentsMap::new(); + + map.push_trailing("a", 1); + map.push_dangling("a", 2); + map.push_trailing("a", 3); + + assert_eq!(map.leading(&"a"), &EMPTY); + assert_eq!(map.dangling(&"a"), &[2]); + assert_eq!(map.trailing(&"a"), &[1, 3]); + + assert_eq!(map.parts(&"a").copied().collect::>(), vec![2, 1, 3]); + + assert!(map.has(&"a")); + } + + #[test] + fn keys_out_of_order() { + let mut map = CommentsMap::new(); + + map.push_leading("a", 1); + map.push_dangling("b", 2); + map.push_leading("a", 3); + + map.push_trailing("c", 4); + map.push_dangling("b", 5); + + map.push_leading("d", 6); + map.push_trailing("c", 7); + + assert_eq!(map.leading(&"a"), &[1, 3]); + assert_eq!(map.dangling(&"b"), &[2, 5]); + assert_eq!(map.trailing(&"c"), &[4, 7]); + + assert!(map.has(&"a")); + assert!(map.has(&"b")); + assert!(map.has(&"c")); + } +} From 03e2cbe3322fe64c2677e098d71cf909e751c8a0 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Thu, 15 Sep 2022 17:13:37 +0200 Subject: [PATCH 2/3] Documentation --- crates/rome_formatter/src/comments/map.rs | 54 +++++++++++++++-------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/crates/rome_formatter/src/comments/map.rs b/crates/rome_formatter/src/comments/map.rs index 90d08949bfd4..bf440ba48a2c 100644 --- a/crates/rome_formatter/src/comments/map.rs +++ b/crates/rome_formatter/src/comments/map.rs @@ -7,25 +7,41 @@ use std::ops::Range; /// An optimized multi-map implementation for storing leading, dangling, and trailing parts for a key. /// -/// The implementation allows to store leading, dangling, and trailing parts for every key. -/// A general purpose multimap implementation would allocate a new [Vec] for the leading, dangling, and trailing -/// parts for every key. +/// A naive implementation using three multimaps, one to store the leading, dangling, and trailing parts, +/// requires between `keys < allocations < keys * 3` vec allocations. /// -/// This implementation avoids this based on the assumptions that, for the vast majority of insertions: -/// * The leading, dangling, and trailing parts for a key are inserted in this order. -/// * The parts are inserted per key. For example, the parts for key one are inserted before the parts for key 2, and so on. +/// This map implementation optimises for the use case where: +/// * Parts belonging to the same key are inserted together. For example, all parts for the key `a` are inserted +/// before inserting any parts for the key `b`. +/// * The parts per key are inserted in the following order: leading, dangling, and then trailing parts. /// -/// Parts that are inserted in compilation with these assumption are stored in a single [Vec] that is -/// shared between all keys to significantly reduce the number of allocated [Vec]s. -/// -/// The implementation does support insertions that don't comply with the above mentioned rules but -/// these come at a performance penalty: -/// * It requires allocating three [Vec], one for the leading, dangling, and trailing parts. -/// * Already inserted parts must be copied over (by cloning) into these newly allocated [Vec]s. +/// Parts inserted in the above mentioned order are stored in a `Vec` shared by all keys to reduce the number +/// of allocations and increased cache locality. The implementation falls back to +/// storing the leading, dangling, and trailing parts of a key in dedicated `Vec`s if the parts +/// aren't inserted in the above described order. However, this comes with a slight performance penalty due to: +/// * Requiring up to three [Vec] allocations, one for the leading, dangling, and trailing parts. +/// * Requires copying already inserted parts for that key (by cloning) into the newly allocated [Vec]s. /// * Resolving the slices for every part requires an extra level of indirection. /// -/// ## Panics -/// Panics when storing the `u32::MAX` part. +/// ## Limitations +/// +/// The map supports storing up to `u32::MAX - 1` parts. Inserting the `u32::MAX`nth part panics. +/// +/// ## Comments +/// +/// Storing the leading, dangling, and trailing comments is an exemplary use case for this map implementation because +/// it is generally desired to keep the comments in the same order as in the source document. This translates to +/// inserting the comments per node and for every node in leading, dangling, trailing order (same order as this map optimises for). +/// +/// Running Rome formatter on real world use cases showed that more than 99.99% of comments get inserted in +/// the described order. +/// +/// The size limitation isn't a concern for comments because Rome supports source documents with a size up to 4GB (`u32::MAX`) +/// and every comment has at least a size of 2 bytes: +/// * 1 byte for the start sequence, e.g. `#` +/// * 1 byte for the end sequence, e.g. `\n` +/// +/// Meaning, the upper bound for comments parts in a document are `u32::MAX / 2`. pub(super) struct CommentsMap { /// Lookup table to retrieve the entry for a key. index: FxHashMap, @@ -33,10 +49,12 @@ pub(super) struct CommentsMap { /// Flat array storing all the values that have been inserted in order. parts: Vec, - /// Vector of vectors. Stores a triple of leading, dangling, and trailing [Vec] for every out of order entry. + /// Vector containing the leading, dangling, and trailing vectors for out of order entries. /// - /// The length of this vec is guaranteed to always be a multiple of 3 where the leading parts are - /// at offset 0, the dangling at offset 1, and the trailing parts at offset 2. + /// The length of `out_of_order` is a multiple of 3 where: + /// * `index % 3 == 0`: Leading parts + /// * `index % 3 == 1`: Dangling parts + /// * `index % 3 == 2`: Trailing parts out_of_order: Vec>, } From 9e0972bf71210807e765cd049a3ce9d3164f0d10 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Thu, 15 Sep 2022 17:35:12 +0200 Subject: [PATCH 3/3] Rename `ValuesIterator` to `PartsIterator` --- crates/rome_formatter/src/comments/map.rs | 52 +++++++++++------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/crates/rome_formatter/src/comments/map.rs b/crates/rome_formatter/src/comments/map.rs index bf440ba48a2c..a60e2d32ad53 100644 --- a/crates/rome_formatter/src/comments/map.rs +++ b/crates/rome_formatter/src/comments/map.rs @@ -46,7 +46,7 @@ pub(super) struct CommentsMap { /// Lookup table to retrieve the entry for a key. index: FxHashMap, - /// Flat array storing all the values that have been inserted in order. + /// Flat array storing all the parts that have been inserted in order. parts: Vec, /// Vector containing the leading, dangling, and trailing vectors for out of order entries. @@ -120,7 +120,7 @@ impl CommentsMap { ); } - // Has leading and dangling comments and its comments are at the end of values + // Has leading and dangling comments and its comments are at the end of parts Some(Entry::InOrder(entry)) if entry.trailing_end.is_none() && self.parts.len() == entry.range().end => { @@ -179,7 +179,7 @@ impl CommentsMap { #[cold] fn entry_to_out_of_order<'a>( entry: &'a mut Entry, - values: &[V], + parts: &[V], out_of_order: &mut Vec>, ) -> &'a mut OutOfOrderEntry where @@ -189,9 +189,9 @@ impl CommentsMap { Entry::InOrder(in_order) => { let index = out_of_order.len(); - out_of_order.push(values[in_order.leading_range()].to_vec()); - out_of_order.push(values[in_order.dangling_range()].to_vec()); - out_of_order.push(values[in_order.trailing_range()].to_vec()); + out_of_order.push(parts[in_order.leading_range()].to_vec()); + out_of_order.push(parts[in_order.dangling_range()].to_vec()); + out_of_order.push(parts[in_order.trailing_range()].to_vec()); *entry = Entry::OutOfOrder(OutOfOrderEntry { leading_index: index, @@ -240,15 +240,15 @@ impl CommentsMap { } /// Returns an iterator over all leading, dangling, and trailing parts of `key`. - pub fn parts(&self, key: &K) -> ValuesIterator { + pub fn parts(&self, key: &K) -> PartsIterator { match self.index.get(key) { - None => ValuesIterator::Slice([].iter()), - Some(Entry::OutOfOrder(entry)) => ValuesIterator::Leading { + None => PartsIterator::Slice([].iter()), + Some(Entry::OutOfOrder(entry)) => PartsIterator::Leading { leading: self.out_of_order[entry.leading_index()].iter(), dangling: &self.out_of_order[entry.dangling_index()], trailing: &self.out_of_order[entry.trailing_index()], }, - Some(Entry::InOrder(entry)) => ValuesIterator::Slice(self.parts[entry.range()].iter()), + Some(Entry::InOrder(entry)) => PartsIterator::Slice(self.parts[entry.range()].iter()), } } } @@ -276,7 +276,7 @@ where } /// Iterator to iterate over all leading, dangling, and trailing parts of a key. -pub(super) enum ValuesIterator<'a, V> { +pub(super) enum PartsIterator<'a, V> { /// The slice into the [CommentsMap::parts] [Vec] if this is an in-order entry or the trailing parts /// of an out-of-order entry. Slice(std::slice::Iter<'a, V>), @@ -297,14 +297,14 @@ pub(super) enum ValuesIterator<'a, V> { }, } -impl<'a, V> Iterator for ValuesIterator<'a, V> { +impl<'a, V> Iterator for PartsIterator<'a, V> { type Item = &'a V; fn next(&mut self) -> Option { match self { - ValuesIterator::Slice(inner) => inner.next(), + PartsIterator::Slice(inner) => inner.next(), - ValuesIterator::Leading { + PartsIterator::Leading { leading, dangling, trailing, @@ -313,7 +313,7 @@ impl<'a, V> Iterator for ValuesIterator<'a, V> { None if !dangling.is_empty() => { let mut dangling_iterator = dangling.iter(); let next = dangling_iterator.next().unwrap(); - *self = ValuesIterator::Dangling { + *self = PartsIterator::Dangling { dangling: dangling_iterator, trailing, }; @@ -322,17 +322,17 @@ impl<'a, V> Iterator for ValuesIterator<'a, V> { None => { let mut trailing_iterator = trailing.iter(); let next = trailing_iterator.next(); - *self = ValuesIterator::Slice(trailing_iterator); + *self = PartsIterator::Slice(trailing_iterator); next } }, - ValuesIterator::Dangling { dangling, trailing } => match dangling.next() { + PartsIterator::Dangling { dangling, trailing } => match dangling.next() { Some(next) => Some(next), None => { let mut trailing_iterator = trailing.iter(); let next = trailing_iterator.next(); - *self = ValuesIterator::Slice(trailing_iterator); + *self = PartsIterator::Slice(trailing_iterator); next } }, @@ -341,8 +341,8 @@ impl<'a, V> Iterator for ValuesIterator<'a, V> { fn size_hint(&self) -> (usize, Option) { match self { - ValuesIterator::Slice(slice) => slice.size_hint(), - ValuesIterator::Leading { + PartsIterator::Slice(slice) => slice.size_hint(), + PartsIterator::Leading { leading, dangling, trailing, @@ -351,7 +351,7 @@ impl<'a, V> Iterator for ValuesIterator<'a, V> { (len, Some(len)) } - ValuesIterator::Dangling { dangling, trailing } => { + PartsIterator::Dangling { dangling, trailing } => { let len = dangling.len() + trailing.len(); (len, Some(len)) } @@ -363,8 +363,8 @@ impl<'a, V> Iterator for ValuesIterator<'a, V> { Self: Sized, { match self { - ValuesIterator::Slice(slice) => slice.last(), - ValuesIterator::Leading { + PartsIterator::Slice(slice) => slice.last(), + PartsIterator::Leading { leading, dangling, trailing, @@ -372,16 +372,16 @@ impl<'a, V> Iterator for ValuesIterator<'a, V> { .last() .or_else(|| dangling.last()) .or_else(|| leading.last()), - ValuesIterator::Dangling { dangling, trailing } => { + PartsIterator::Dangling { dangling, trailing } => { trailing.last().or_else(|| dangling.last()) } } } } -impl ExactSizeIterator for ValuesIterator<'_, V> {} +impl ExactSizeIterator for PartsIterator<'_, V> {} -impl FusedIterator for ValuesIterator<'_, V> {} +impl FusedIterator for PartsIterator<'_, V> {} #[derive(Debug)] enum Entry {