egglog_union_find/concurrent/
mod.rs

1//! A concurrent implementation of a union-find datastructure.
2//!
3//! See the `uf` module for more details on the implementation.
4
5use crate::numeric_id::NumericId;
6use atomic_int::AtomicInt;
7
8pub(crate) mod atomic_int;
9pub(crate) mod buffer;
10pub(crate) mod uf;
11
12#[cfg(test)]
13mod tests;
14
15/// A thread-safe implementation of a union-find datastructure.
16///
17/// This implementation supports concurrent finds and merges, with path
18/// compression. Importantly, this implementation supports dynamically resizing
19/// the underlying array when new ids appear. This allows callers to generate
20/// Ids more flexibly, though huge amounts of resizing will cause contention as
21/// callers wait for resizes to complete.
22///
23/// The `Clone` implementation for this type is shallow: copies of the
24/// data-structure see one another's updates.
25pub struct UnionFind<T: NumericId> {
26    inner: uf::ConcurrentUnionFind<T::Atomic>,
27}
28
29impl<T: NumericId> Clone for UnionFind<T> {
30    fn clone(&self) -> Self {
31        Self {
32            inner: self.inner.clone(),
33        }
34    }
35}
36
37impl<T: NumericId> Default for UnionFind<T>
38where
39    T::Atomic: AtomicInt,
40{
41    fn default() -> Self {
42        Self {
43            inner: Default::default(),
44        }
45    }
46}
47
48impl<T: NumericId> UnionFind<T>
49where
50    T::Atomic: AtomicInt<Underlying = T::Rep>,
51{
52    /// Reset the union-find datastructure, setting each element to point to itself.
53    ///
54    /// This method blocks until all threads have finished their current operations.
55    pub fn reset(&self) {
56        self.inner.reset();
57    }
58
59    /// Create a deep copy of the union-find datastructure: subsequent unions on
60    /// the returned copy will not affect the original.
61    pub fn deep_copy(&self) -> Self {
62        Self {
63            inner: self.inner.deep_copy(),
64        }
65    }
66
67    /// Initialize a union-find with `capacity` elements pointing to themselves.
68    pub fn with_capacity(capacity: usize) -> Self {
69        Self {
70            inner: uf::ConcurrentUnionFind::with_capacity(capacity),
71        }
72    }
73    /// Get the canonical value associated with `elt`.
74    ///
75    /// Note that it may be the case that `find(x) != find(y)` even if `x` and
76    /// `y` belong to the same equivalence class, as the canonical value for the
77    /// class may have changed between the `find(x)` and `find(y)` calls. To get
78    /// a "ground truth" view on this, use the `same_set` method.
79    ///
80    /// We expose `find` because it _does_ work if you know that there are not
81    /// any concurrent `merge` operations on the data-structure: this is common
82    /// enough in egglog. It also makes some tests easier to write.
83    pub fn find(&self, elt: T) -> T {
84        T::new(self.inner.find(elt.rep()))
85    }
86
87    /// Check if `l` and `r` belong to the same equivalence class.
88    pub fn same_set(&self, l: T, r: T) -> bool {
89        self.inner.same_set(l.rep(), r.rep())
90    }
91
92    /// Merge the equivalence classes of `l` and `r`, returning the new parent
93    /// and new child classes. If `l` and `r` are already in the same class,
94    /// then their canonical representative is returned twice.
95    pub fn union(&self, l: T, r: T) -> (T, T) {
96        let (parent, child) = self.inner.merge(l.rep(), r.rep());
97        (T::new(parent), T::new(child))
98    }
99}