Skip to content

Commit

Permalink
Generic variant of connected_components
Browse files Browse the repository at this point in the history
  • Loading branch information
samueltardieu committed Sep 5, 2024
1 parent b1c2f35 commit 8b658e5
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 86 deletions.
236 changes: 154 additions & 82 deletions src/undirected/connected_components.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,158 @@ use std::collections::hash_map::Entry::{Occupied, Vacant};
use std::collections::{HashMap, HashSet};
use std::hash::Hash;
use std::iter::once;
use std::marker::PhantomData;

fn join(table: &mut [usize], mut rx: usize, mut ry: usize) -> usize {
while table[rx] != table[ry] {
if table[rx] > table[ry] {
if rx == table[rx] {
table[rx] = table[ry];
break;
use rustc_hash::{FxHashMap, FxHashSet};

/// A connected component implementation for various generic types.
/// This structure is only useful if the default collections used by
/// the various functions of the [`connected_components`](self) module
/// do not fit your needs.
pub struct ConnectedComponents<
N,
It = Vec<N>,
It2 = HashSet<N>,
C1 = HashSet<N>,
C2 = Vec<C1>,
C3 = HashMap<N, usize>,
> {
_n: PhantomData<N>,
_it: PhantomData<It>,
_it2: PhantomData<It2>,
_c1: PhantomData<C1>,
_c2: PhantomData<C2>,
_c3: PhantomData<C3>,
}

impl<N, It, It2, C1, C2, C3> ConnectedComponents<N, It, It2, C1, C2, C3>
where
N: Hash + Eq + Clone,
It: IntoIterator<Item = N> + Clone,
for<'it> &'it It: IntoIterator<Item = &'it N>,
for<'it> &'it It2: IntoIterator<Item = &'it N>,
C1: FromIterator<N>,
C2: FromIterator<C1>,
C3: FromIterator<(N, usize)>,
{
/// Separate components of an undirected graph into disjoint sets.
///
/// - `groups` is a set of group of vertices connected together. It is
/// acceptable for a group to contain only one node. Empty groups
/// receive special treatment (see below).
///
/// This function returns a pair containing:
///
/// - A mapping from every vertex to its set identifier. The set identifiers are
/// opaque and will not necessarily be compact. However, it is guaranteed that
/// they will not be greater than the number of groups.
/// - A mapping from every group to its set identifier, with the identifiers being
/// the same ones as the ones in the previous mapping. Each group corresponds to
/// the identifier at the same index, except for empty group whose identifier is
/// set to `usize::MAX`.
///
/// Note that if you have a raw undirected graph, you can build
/// such a structure by creating a group for every vertex containing
/// the vertex itself and its immediate neighbours.
#[must_use]
pub fn separate_components(groups: &[It]) -> (HashMap<&N, usize>, Vec<usize>) {
let mut table = (0..groups.len()).collect::<Vec<_>>();
let mut indices = HashMap::new();
for (mut group_index, group) in groups.iter().enumerate() {
let mut is_empty = true;
for element in group {
is_empty = false;
match indices.entry(element) {
Occupied(e) => {
table[group_index] = find(&mut table, *e.get());
group_index = table[group_index];
}
Vacant(e) => {
e.insert(group_index);
}
}
}
if is_empty {
table[group_index] = usize::MAX;
}
}
for group_index in indices.values_mut() {
*group_index = find(&mut table, *group_index);
}
for group_index in 0..groups.len() {
if table[group_index] != usize::MAX {
let target = find(&mut table, group_index);
// Due to path halving, this particular entry might not
// be up-to-date yet.
table[group_index] = target;
}
let z = table[rx];
table[rx] = table[ry];
rx = z;
} else {
if ry == table[ry] {
table[ry] = table[rx];
break;
}
(indices, table)
}

/// Separate components of an undirected graph into disjoint sets.
///
/// - `groups` is a set of group of vertices connected together. It is
/// acceptable for a group to contain only one node.
///
/// This function returns a list of sets of nodes forming disjoint connected
/// sets.
#[must_use]
pub fn components(groups: &[It]) -> C2 {
let (_, gindices) = Self::separate_components(groups);
let mut gb: FxHashMap<usize, FxHashSet<N>> = FxHashMap::default();
for (i, n) in gindices
.into_iter()
.enumerate()
.filter(|&(_, n)| n != usize::MAX)
{
let set = gb.entry(n).or_default();
for e in groups[i].clone() {
set.insert(e);
}
let z = table[ry];
table[ry] = table[rx];
ry = z;
}
gb.into_values().map(|v| v.into_iter().collect()).collect()
}

/// Extract connected components from a graph.
///
/// - `starts` is a collection of vertices to be considered as start points.
/// - `neighbours` is a function returning the neighbours of a given node.
///
/// This function returns a list of sets of nodes forming disjoint connected
/// sets.
pub fn connected_components<FN, IN>(starts: &[N], mut neighbours: FN) -> C2
where
FN: FnMut(&N) -> IN,
IN: IntoIterator<Item = N>,
{
ConnectedComponents::<N, Vec<N>, It2, C1, C2, C3>::components(
&starts
.iter()
.map(|s| {
neighbours(s)
.into_iter()
.chain(once(s.clone()))
.collect::<Vec<_>>()
})
.collect::<Vec<_>>(),
)
}

/// Locate vertices amongst disjoint sets.
///
/// - `components` are disjoint vertices sets.
///
/// This function returns a map between every vertex and the index of
/// the set it belongs to in the `components` list.
#[must_use]
pub fn component_index(components: &[It2]) -> C3 {
components
.iter()
.enumerate()
.flat_map(|(i, c)| c.into_iter().map(move |n| (n.clone(), i)))
.collect()
}
table[rx]
}

fn find(table: &mut [usize], mut x: usize) -> usize {
Expand Down Expand Up @@ -59,36 +189,9 @@ fn find(table: &mut [usize], mut x: usize) -> usize {
#[must_use]
pub fn separate_components<N>(groups: &[Vec<N>]) -> (HashMap<&N, usize>, Vec<usize>)
where
N: Hash + Eq,
N: Hash + Eq + Clone,
{
let mut table = (0..groups.len()).collect::<Vec<_>>();
let mut indices = HashMap::new();
for (mut group_index, group) in groups.iter().enumerate() {
if group.is_empty() {
table[group_index] = usize::MAX;
}
for element in group {
match indices.entry(element) {
Occupied(e) => {
group_index = join(&mut table, group_index, *e.get());
}
Vacant(e) => {
e.insert(group_index);
}
}
}
}
for group_index in indices.values_mut() {
*group_index = find(&mut table, *group_index);
}
// Flatten the table.
for group_index in 0..groups.len() {
if table[group_index] != usize::MAX {
let target = find(&mut table, group_index);
table[group_index] = target;
}
}
(indices, table)
ConnectedComponents::<N>::separate_components(groups)
}

/// Separate components of an undirected graph into disjoint sets.
Expand All @@ -103,27 +206,7 @@ pub fn components<N>(groups: &[Vec<N>]) -> Vec<HashSet<N>>
where
N: Clone + Hash + Eq,
{
let (_, gindices) = separate_components(groups);
let mut gb = gindices
.into_iter()
.enumerate()
.filter(|&(_, n)| n != usize::MAX)
.collect::<Vec<_>>();
gb.sort_unstable_by(|&(_, n1), &(_, n2)| Ord::cmp(&n1, &n2));
let mut key = None;
let mut res = vec![];
for (group_index, k) in gb {
if key != Some(k) {
res.push(HashSet::default());
key = Some(k);
}
if let Some(set) = res.last_mut() {
for item in &groups[group_index] {
set.insert(item.clone());
}
}
}
res
ConnectedComponents::<N>::components(groups)
}

/// Extract connected components from a graph.
Expand All @@ -133,18 +216,13 @@ where
///
/// This function returns a list of sets of nodes forming disjoint connected
/// sets.
pub fn connected_components<N, FN, IN>(starts: &[N], mut neighbours: FN) -> Vec<HashSet<N>>
pub fn connected_components<N, FN, IN>(starts: &[N], neighbours: FN) -> Vec<HashSet<N>>
where
N: Clone + Hash + Eq,
FN: FnMut(&N) -> IN,
IN: IntoIterator<Item = N>,
{
components(
&starts
.iter()
.map(|s| neighbours(s).into_iter().chain(once(s.clone())).collect())
.collect::<Vec<_>>(),
)
ConnectedComponents::<N>::connected_components(starts, neighbours)
}

/// Locate vertices amongst disjoint sets.
Expand All @@ -153,17 +231,11 @@ where
///
/// This function returns a map between every vertex and the index of
/// the set it belongs to in the `components` list.
#[allow(clippy::implicit_hasher)]
#[must_use]
#[allow(clippy::implicit_hasher)]
pub fn component_index<N>(components: &[HashSet<N>]) -> HashMap<N, usize>
where
N: Clone + Hash + Eq,
{
let mut assoc = HashMap::with_capacity(components.len());
for (i, c) in components.iter().enumerate() {
for n in c {
assoc.insert(n.clone(), i);
}
}
assoc
ConnectedComponents::<N>::component_index(components)
}
11 changes: 7 additions & 4 deletions tests/connected-components.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ fn empty_separate_components() {

#[test]
fn basic_components() {
let c = components(&[vec![1, 2], vec![3, 4], vec![5, 6], vec![1, 4, 7]]);
let mut c = components(&[vec![1, 2], vec![3, 4], vec![5, 6], vec![1, 4, 7]]);
c.sort_unstable_by_key(|v| *v.iter().min().unwrap());
assert_eq!(c.len(), 2);
assert_eq!(
c[0].clone().into_iter().sorted().collect_vec(),
Expand All @@ -44,7 +45,8 @@ fn basic_components() {

#[test]
fn empty_components() {
let c = components(&[vec![1, 2], vec![3, 4], vec![], vec![1, 4, 7]]);
let mut c = components(&[vec![1, 2], vec![3, 4], vec![], vec![1, 4, 7]]);
c.sort_unstable_by_key(|v| *v.iter().min().unwrap());
assert_eq!(c.len(), 1);
assert_eq!(
c[0].clone().into_iter().sorted().collect_vec(),
Expand All @@ -55,14 +57,15 @@ fn empty_components() {
#[test]
fn basic_connected_components() {
let mut counter = 0;
let c = connected_components(&[1, 4], |&n| {
let mut c = connected_components(&[1, 4], |&n| {
counter += 1;
if n % 2 == 0 {
vec![2, 4, 6, 8]
} else {
vec![1, 3, 5, 7]
}
});
c.sort_unstable_by_key(|v| *v.iter().min().unwrap());
assert_eq!(c.len(), 2);
assert_eq!(
c[0].clone().into_iter().sorted().collect_vec(),
Expand Down Expand Up @@ -101,7 +104,7 @@ fn larger_separate_components() {
component
})
.collect_vec();
components.sort_unstable_by_key(|c| c[0]);
components.sort_unstable_by_key(|v| *v.iter().min().unwrap());
let mut groups = components
.iter()
.flat_map(|component| {
Expand Down

0 comments on commit 8b658e5

Please sign in to comment.