From d15d5937080b752d45c1f529ef26c4f8a06b116d Mon Sep 17 00:00:00 2001 From: Wilfred Hughes Date: Sun, 28 Apr 2024 16:16:47 -0700 Subject: [PATCH] Move to smallvec for seen vertices This is a surprisingly large perf win. On my Thinkpad: typing_before/after.ml: before: 3.038B instructions after: 2.870B instructions slow_before/after.rs: before: 2.381B instructions after: 1.260B instructions (!) --- CHANGELOG.md | 5 +++++ Cargo.lock | 1 + Cargo.toml | 1 + src/diff/graph.rs | 9 +++++---- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 14ca03d411..17bbafbe77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,11 @@ changes. Difftastic now has a man page, see the `difft.1` file. +### Performance + +Fixed a memory leak and substantially improved performance in some +cases (up to 2x in testing). + ## 0.57 (released 1st April 2024) ### Parsing diff --git a/Cargo.lock b/Cargo.lock index 4b6931a9aa..79864061d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -251,6 +251,7 @@ dependencies = [ "rustc-hash", "serde", "serde_json", + "smallvec", "strsim", "strum", "tree-sitter", diff --git a/Cargo.toml b/Cargo.toml index 72086114e1..37f74929df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,6 +80,7 @@ humansize = "2.1.3" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" line-numbers = "0.3.0" +smallvec = "1.13.2" [dev-dependencies] # assert_cmd 2.0.6 requires rust 1.60 diff --git a/src/diff/graph.rs b/src/diff/graph.rs index ea85c80b18..7c99159bdc 100644 --- a/src/diff/graph.rs +++ b/src/diff/graph.rs @@ -7,6 +7,7 @@ use std::{ hash::{Hash, Hasher}, }; +use smallvec::{SmallVec, smallvec}; use bumpalo::Bump; use hashbrown::hash_map::RawEntryMut; use strsim::normalized_levenshtein; @@ -365,7 +366,7 @@ impl Edge { fn allocate_if_new<'s, 'b>( v: Vertex<'s, 'b>, alloc: &'b Bump, - seen: &mut DftHashMap<&Vertex<'s, 'b>, Vec<&'b Vertex<'s, 'b>>>, + seen: &mut DftHashMap<&Vertex<'s, 'b>, SmallVec<[&'b Vertex<'s, 'b>; 2]>>, ) -> &'b Vertex<'s, 'b> { // We use the entry API so that we only need to do a single lookup // for access and insert. @@ -399,11 +400,11 @@ fn allocate_if_new<'s, 'b>( let allocated = alloc.alloc(v); // We know that this vec will never have more than 2 - // nodes, and this code is very hot, so reserve. + // nodes, and this code is very hot, so use a smallvec. // // We still use a vec to enable experiments with the value // of how many possible parenthesis nestings to explore. - let mut existing: Vec<&'b Vertex<'s, 'b>> = Vec::with_capacity(2); + let mut existing: SmallVec<[&'b Vertex<'s, 'b>; 2]> = smallvec![&*allocated]; existing.push(allocated); vacant.insert(allocated, existing); @@ -496,7 +497,7 @@ fn pop_all_parents<'s, 'b>( pub(crate) fn set_neighbours<'s, 'b>( v: &Vertex<'s, 'b>, alloc: &'b Bump, - seen: &mut DftHashMap<&Vertex<'s, 'b>, Vec<&'b Vertex<'s, 'b>>>, + seen: &mut DftHashMap<&Vertex<'s, 'b>, SmallVec<[&'b Vertex<'s, 'b>; 2]>>, ) { if v.neighbours.borrow().is_some() { return;