egglog_core_relations/free_join/
mod.rs

1//! Execute queries against a database using a variant of Free Join.
2use std::{
3    mem,
4    sync::{
5        Arc,
6        atomic::{AtomicUsize, Ordering},
7    },
8};
9
10use crate::{
11    common::IndexSet,
12    hash_index::IndexCatalog,
13    numeric_id::{DenseIdMap, DenseIdMapWithReuse, NumericId, define_id},
14};
15use egglog_concurrency::{NotificationList, ResettableOnceLock};
16use rayon::prelude::*;
17use smallvec::SmallVec;
18
19use crate::{
20    BaseValues, ContainerRebuildSummary, ContainerValues, PoolSet, QueryEntry, TupleIndex, Value,
21    action::{
22        Bindings, DbView,
23        mask::{Mask, MaskIter, ValueSource},
24    },
25    dependency_graph::DependencyGraph,
26    hash_index::{ColumnIndex, Index, IndexBase},
27    offsets::Subset,
28    parallel_heuristics::parallelize_db_level_op,
29    pool::{Pool, Pooled, with_pool_set},
30    query::{Query, RuleSetBuilder},
31    table_spec::{
32        ColumnId, Constraint, MutationBuffer, Table, TableSpec, WrappedTable, WrappedTableRef,
33    },
34};
35
36use self::plan::Plan;
37use crate::action::ExecutionState;
38
39pub(crate) mod execute;
40pub(crate) mod frame_update;
41pub(crate) mod plan;
42
43define_id!(
44    pub AtomId,
45    u32,
46    "A component of a query consisting of a function and a list of variables or constants"
47);
48define_id!(pub Variable, u32, "a variable in a query", pretty "Var");
49
50impl Variable {
51    pub fn placeholder() -> Variable {
52        Variable::new(!0)
53    }
54}
55
56define_id!(pub TableId, u32, "a table in the database");
57
58impl TableId {
59    pub fn dummy() -> TableId {
60        TableId::new(u32::MAX)
61    }
62
63    pub fn is_dummy(&self) -> bool {
64        self.rep == u32::MAX
65    }
66}
67
68define_id!(pub(crate) ActionId, u32, "an identifier picking out the RHS of a rule");
69
70#[derive(Debug)]
71pub(crate) struct ProcessedConstraints {
72    /// The subset of the table matching the fast constraints. If there are no
73    /// fast constraints then this is the full table.
74    pub(crate) subset: Subset,
75    /// The constraints that can be evaluated quickly (O(log(n)) or O(1)).
76    pub(crate) fast: Pooled<Vec<Constraint>>,
77    /// The constraints that require an O(n) scan to evaluate.
78    pub(crate) slow: Pooled<Vec<Constraint>>,
79}
80
81impl Clone for ProcessedConstraints {
82    fn clone(&self) -> Self {
83        ProcessedConstraints {
84            subset: self.subset.clone(),
85            fast: Pooled::cloned(&self.fast),
86            slow: Pooled::cloned(&self.slow),
87        }
88    }
89}
90
91impl ProcessedConstraints {
92    /// The size of the subset of the table matching the fast constraints.
93    fn approx_size(&self) -> usize {
94        self.subset.size()
95    }
96
97    pub(crate) fn dummy() -> ProcessedConstraints {
98        ProcessedConstraints {
99            subset: Subset::empty(),
100            fast: Pooled::new(Vec::new()),
101            slow: Pooled::new(Vec::new()),
102        }
103    }
104}
105
106#[derive(Clone, Debug, PartialEq, Eq)]
107pub(crate) struct SubAtom {
108    pub(crate) atom: AtomId,
109    pub(crate) vars: SmallVec<[ColumnId; 2]>,
110}
111
112impl SubAtom {
113    pub(crate) fn new(atom: AtomId) -> SubAtom {
114        SubAtom {
115            atom,
116            vars: Default::default(),
117        }
118    }
119}
120
121#[derive(Debug, Clone)]
122pub(crate) struct VarInfo {
123    pub(crate) occurrences: Vec<SubAtom>,
124    /// Whether or not this variable shows up in the "actions" portion of a
125    /// rule.
126    pub(crate) used_in_rhs: bool,
127    pub(crate) defined_in_rhs: bool,
128    pub(crate) name: Option<Arc<str>>,
129}
130
131pub(crate) type HashIndex = Arc<ResettableOnceLock<Index<TupleIndex>>>;
132pub(crate) type HashColumnIndex = Arc<ResettableOnceLock<Index<ColumnIndex>>>;
133
134pub struct TableInfo {
135    pub(crate) name: Option<Arc<str>>,
136    pub(crate) spec: TableSpec,
137    pub(crate) table: WrappedTable,
138    pub(crate) indexes: IndexCatalog<SmallVec<[ColumnId; 4]>, HashIndex>,
139    pub(crate) column_indexes: IndexCatalog<ColumnId, HashColumnIndex>,
140}
141
142impl TableInfo {
143    pub fn table(&self) -> &WrappedTable {
144        &self.table
145    }
146
147    pub fn name(&self) -> Option<&str> {
148        self.name.as_deref()
149    }
150
151    pub fn spec(&self) -> &TableSpec {
152        &self.spec
153    }
154}
155
156impl Clone for TableInfo {
157    fn clone(&self) -> Self {
158        fn deep_clone_map<K: Clone + std::hash::Hash + Eq, TI: IndexBase + Clone>(
159            map: &IndexCatalog<K, Arc<ResettableOnceLock<Index<TI>>>>,
160            table: WrappedTableRef,
161        ) -> IndexCatalog<K, Arc<ResettableOnceLock<Index<TI>>>> {
162            map.map(|table_ref| {
163                let (k, v) = table_ref;
164                let v: Index<TI> = v
165                    .get_or_update(|index| {
166                        index.refresh(table);
167                    })
168                    .clone();
169                (k.clone(), Arc::new(ResettableOnceLock::new(v)))
170            })
171        }
172        TableInfo {
173            name: self.name.clone(),
174            spec: self.spec.clone(),
175            table: self.table.dyn_clone(),
176            indexes: deep_clone_map(&self.indexes, self.table.as_ref()),
177            column_indexes: deep_clone_map(&self.column_indexes, self.table.as_ref()),
178        }
179    }
180}
181
182define_id!(pub CounterId, u32, "A counter accessible to actions, useful for generating unique Ids.");
183define_id!(pub ExternalFunctionId, u32, "A user-defined operation that can be invoked from a query");
184
185/// External functions allow external callers to manipulate database state in
186/// near-arbitrary ways.
187///
188/// This is a useful, if low-level, interface for extending this database with
189/// functionality and state not built into the core model.
190pub trait ExternalFunction: dyn_clone::DynClone + Send + Sync {
191    /// Invoke the function with mutable access to the database. If a value is
192    /// not returned, halt the execution of the current rule.
193    fn invoke(&self, state: &mut ExecutionState, args: &[Value]) -> Option<Value>;
194}
195
196/// Automatically generate an `ExternalFunction` implementation from a function.
197pub fn make_external_func<
198    F: Fn(&mut ExecutionState, &[Value]) -> Option<Value> + Clone + Send + Sync,
199>(
200    f: F,
201) -> impl ExternalFunction {
202    #[derive(Clone)]
203    struct Wrapped<F>(F);
204    impl<F> ExternalFunction for Wrapped<F>
205    where
206        F: Fn(&mut ExecutionState, &[Value]) -> Option<Value> + Clone + Send + Sync,
207    {
208        fn invoke(&self, state: &mut ExecutionState, args: &[Value]) -> Option<Value> {
209            (self.0)(state, args)
210        }
211    }
212    Wrapped(f)
213}
214
215/// A vectorized variant of [`ExternalFunction::invoke`] to avoid repeated dynamic dispatch.
216pub(crate) fn invoke_batch(
217    this: &dyn ExternalFunction,
218    state: &mut ExecutionState,
219    mask: &mut Mask,
220    bindings: &mut Bindings,
221    args: &[QueryEntry],
222    out_var: Variable,
223) {
224    let pool: Pool<Vec<Value>> = with_pool_set(|ps| ps.get_pool());
225    let mut out = pool.get();
226    out.reserve(mask.len());
227    for_each_binding_with_mask!(mask, args, bindings, |iter| {
228        iter.fill_vec(&mut out, Value::stale, |_, args| {
229            this.invoke(state, args.as_slice())
230        });
231    });
232    bindings.insert(out_var, &out);
233}
234
235/// A variant of [`invoke_batch`] that overwrites the output variable,
236/// rather than assigning all new values.
237///
238/// *Panics* This method will panic if `out_var` doesn't already have an appropriately-sized
239/// vector bound in `bindings`.
240pub(crate) fn invoke_batch_assign(
241    this: &dyn ExternalFunction,
242    state: &mut ExecutionState,
243    mask: &mut Mask,
244    bindings: &mut Bindings,
245    args: &[QueryEntry],
246    out_var: Variable,
247) {
248    let mut out = bindings.take(out_var).expect("out_var must be bound");
249    for_each_binding_with_mask!(mask, args, bindings, |iter| {
250        iter.assign_vec_and_retain(&mut out.vals, |_, args| this.invoke(state, &args))
251    });
252    bindings.replace(out);
253}
254
255// Implements `Clone` for `Box<dyn ExternalFunction>`.
256dyn_clone::clone_trait_object!(ExternalFunction);
257
258pub(crate) type ExternalFunctions =
259    DenseIdMapWithReuse<ExternalFunctionId, Box<dyn ExternalFunction>>;
260
261#[derive(Default)]
262pub(crate) struct Counters(DenseIdMap<CounterId, AtomicUsize>);
263
264impl Clone for Counters {
265    fn clone(&self) -> Counters {
266        let mut map = DenseIdMap::new();
267        for (k, v) in self.0.iter() {
268            // NB: we may want to experiment with Ordering::Relaxed here.
269            map.insert(k, AtomicUsize::new(v.load(Ordering::SeqCst)));
270        }
271        Counters(map)
272    }
273}
274
275impl Counters {
276    pub(crate) fn read(&self, ctr: CounterId) -> usize {
277        self.0[ctr].load(Ordering::Acquire)
278    }
279    pub(crate) fn inc(&self, ctr: CounterId) -> usize {
280        // We synchronize with `read_counter` but not with other increments.
281        // NB: we may want to experiment with Ordering::Relaxed here.
282        self.0[ctr].fetch_add(1, Ordering::Release)
283    }
284}
285
286/// A collection of tables and indexes over them.
287///
288/// A database also owns the memory pools used by its tables.
289#[derive(Clone, Default)]
290pub struct Database {
291    // NB: some fields are pub(crate) to allow some internal modules to avoid
292    // borrowing the whole table.
293    pub(crate) tables: DenseIdMap<TableId, TableInfo>,
294    // TODO: having a single AtomicUsize per counter can lead to contention. We
295    // should look into prefetching counters when creating a new ExecutionState
296    // and incrementing locally. Note that the batch size shouldn't be too big
297    // because we keep an array per id in the UF.
298    pub(crate) counters: Counters,
299    pub(crate) external_functions: ExternalFunctions,
300    container_values: ContainerValues,
301    /// `notification_list` contains the list of tables that have been modified since the last call
302    /// to [`Database::merge_all`].
303    notification_list: NotificationList<TableId>,
304    // Tracks the relative dependencies between tables during merge operations.
305    deps: DependencyGraph,
306    base_values: BaseValues,
307    /// A rough estimate of the total size of the database.
308    ///
309    /// This is primarily used to determine whether or not to attempt to do some operations in
310    /// parallel.
311    total_size_estimate: usize,
312}
313
314impl Database {
315    /// Create an empty Database.
316    ///
317    /// Queries are executed using the current rayon thread pool, which defaults to the global
318    /// thread pool.
319    pub fn new() -> Database {
320        Database::default()
321    }
322
323    /// Initialize a new rulse set to run against this database.
324    pub fn new_rule_set(&mut self) -> RuleSetBuilder<'_> {
325        RuleSetBuilder::new(self)
326    }
327
328    /// Add a new external function to the database.
329    pub fn add_external_function(
330        &mut self,
331        f: Box<dyn ExternalFunction + 'static>,
332    ) -> ExternalFunctionId {
333        self.external_functions.push(f)
334    }
335
336    /// Free an existing external function. Make sure not to use `id` afterwards.
337    pub fn free_external_function(&mut self, id: ExternalFunctionId) {
338        self.external_functions.take(id);
339    }
340
341    pub fn base_values(&self) -> &BaseValues {
342        &self.base_values
343    }
344
345    pub fn base_values_mut(&mut self) -> &mut BaseValues {
346        &mut self.base_values
347    }
348
349    pub fn container_values(&self) -> &ContainerValues {
350        &self.container_values
351    }
352
353    pub fn container_values_mut(&mut self) -> &mut ContainerValues {
354        &mut self.container_values
355    }
356
357    pub fn rebuild_containers(&mut self, table_id: TableId) -> ContainerRebuildSummary {
358        let mut containers = mem::take(&mut self.container_values);
359        let table = &self.tables[table_id].table;
360        let res = self.with_execution_state(|state| containers.rebuild_all(table_id, table, state));
361        self.container_values = containers;
362        res
363    }
364
365    /// Apply the value-level rebuild encoded by `func_id` to all the tables in `to_rebuild`.
366    ///
367    /// The native [`Table::apply_rebuild`] method takes a `next_ts` argument for filling in new
368    /// values in a table like [`crate::SortedWritesTable`] where values in a certain column need
369    /// to be inserted in sorted order; the `next_ts` argument to this method is passed to
370    /// `apply_rebuild` for this purpose.
371    pub fn apply_rebuild(
372        &mut self,
373        func_id: TableId,
374        to_rebuild: &[TableId],
375        next_ts: Value,
376    ) -> bool {
377        let func = self.tables.take(func_id).unwrap();
378        self.run_on_tables(to_rebuild, |_, info, view| {
379            info.table.apply_rebuild(
380                func_id,
381                &func.table,
382                next_ts,
383                &mut ExecutionState::new(*view, Default::default()),
384            )
385        });
386        self.tables.insert(func_id, func);
387        self.merge_all()
388    }
389
390    pub fn refresh_rows_for_values(
391        &mut self,
392        to_refresh: &[TableId],
393        dirty_ids: &[Value],
394        next_ts: Value,
395    ) -> bool {
396        if dirty_ids.is_empty() {
397            return false;
398        }
399        // This is the follow-up for `ContainerRebuildSummary::dirty_ids()`.
400        // These ids changed semantics without changing identity, so parent
401        // rows can become newly matchable without getting an ordinary table
402        // delta.
403        //
404        // It must run after ordinary table rebuild, which already handles
405        // changed-id cases by rewriting parent rows to the new id.
406        self.run_on_tables(to_refresh, |_, info, _| {
407            info.table.refresh_rows_for_values(dirty_ids, next_ts)
408        });
409        self.merge_all()
410    }
411
412    fn run_on_tables(
413        &mut self,
414        table_ids: &[TableId],
415        run: impl for<'a> Fn(TableId, &mut TableInfo, &DbView<'a>) -> bool + Sync,
416    ) {
417        if parallelize_db_level_op(self.total_size_estimate) {
418            let mut tables = Vec::with_capacity(table_ids.len());
419            for id in table_ids {
420                tables.push((*id, self.tables.take(*id).unwrap()));
421            }
422            let view = self.read_only_view();
423            tables.par_iter_mut().for_each(|(id, info)| {
424                if run(*id, info, &view) {
425                    self.notification_list.notify(*id);
426                }
427            });
428            for (id, info) in tables {
429                self.tables.insert(id, info);
430            }
431        } else {
432            for id in table_ids {
433                let mut info = self.tables.take(*id).unwrap();
434                let changed = {
435                    let view = self.read_only_view();
436                    run(*id, &mut info, &view)
437                };
438                if changed {
439                    self.notification_list.notify(*id);
440                }
441                self.tables.insert(*id, info);
442            }
443        }
444    }
445
446    /// Run `f` with access to an `ExecutionState` mapped to this database.
447    pub fn with_execution_state<R>(&self, f: impl FnOnce(&mut ExecutionState) -> R) -> R {
448        let mut state = ExecutionState::new(self.read_only_view(), Default::default());
449        f(&mut state)
450    }
451
452    pub(crate) fn read_only_view(&self) -> DbView<'_> {
453        DbView {
454            table_info: &self.tables,
455            counters: &self.counters,
456            external_funcs: &self.external_functions,
457            bases: &self.base_values,
458            containers: &self.container_values,
459            notification_list: &self.notification_list,
460        }
461    }
462
463    /// Estimate the size of the table. If a constraint is provided, return an
464    /// estimate of the size of the subset of the table matching the constraint.
465    pub fn estimate_size(&self, table: TableId, c: Option<Constraint>) -> usize {
466        let table_info = self
467            .tables
468            .get(table)
469            .expect("table must be declared in the current database");
470        let table = &table_info.table;
471        if let Some(c) = c {
472            if let Some(sub) = table.fast_subset(&c) {
473                // In the case where a the constraint can be computed quickly,
474                // we do not filter for staleness, which may over-approximate.
475                sub.size()
476            } else {
477                table.refine_one(table.refine_live(table.all()), &c).size()
478            }
479        } else {
480            table.len()
481        }
482    }
483
484    /// Create a new counter for this database.
485    ///
486    /// These counters can be used to generate unique ids as part of an action.
487    pub fn add_counter(&mut self) -> CounterId {
488        self.counters.0.push(AtomicUsize::new(0))
489    }
490
491    /// Increment the given counter and return its previous value.
492    pub fn inc_counter(&self, counter: CounterId) -> usize {
493        self.counters.inc(counter)
494    }
495
496    /// Get the current value of the given counter.
497    pub fn read_counter(&self, counter: CounterId) -> usize {
498        self.counters.read(counter)
499    }
500
501    /// A helper for merging all pending updates. Used to write to the database after updates have
502    /// been staged. Returns true if any tuples were added.
503    ///
504    /// Exposed for testing purposes.
505    ///
506    /// Useful for out-of-band insertions into the database.
507    pub fn merge_all(&mut self) -> bool {
508        let mut ever_changed = false;
509        let do_parallel = parallelize_db_level_op(self.total_size_estimate);
510        let mut to_merge = IndexSet::default();
511        loop {
512            to_merge.clear();
513            let to_merge_vec = self.notification_list.reset();
514            if to_merge_vec.len() < 4 {
515                ever_changed |= self.merge_simple(to_merge_vec);
516                break;
517            }
518            for table in to_merge_vec {
519                to_merge.insert(table);
520            }
521
522            let mut changed = false;
523            let mut tables_merging = DenseIdMap::<
524                TableId,
525                (
526                    // The info needed to merge this table.
527                    Option<TableInfo>,
528                    // Pre-allocated write buffers, according to the tables declared write
529                    // dependencies.
530                    DenseIdMap<TableId, Box<dyn MutationBuffer>>,
531                ),
532            >::with_capacity(self.tables.n_ids());
533            for stratum in self.deps.strata() {
534                // Initialize the write dependencies first.
535                for table in stratum.intersection(&to_merge).copied() {
536                    let mut bufs = DenseIdMap::default();
537                    for dep in self.deps.write_deps(table) {
538                        if let Some(info) = self.tables.get(dep) {
539                            bufs.insert(dep, info.table.new_buffer());
540                        }
541                    }
542                    tables_merging.insert(table, (None, bufs));
543                }
544                // Then initialize read dependencies (this two-phase structure is why we have an
545                // Option in the tables_merging map).
546                for table in stratum.intersection(&to_merge).copied() {
547                    tables_merging[table].0 = Some(self.tables.unwrap_val(table));
548                }
549                let db = self.read_only_view();
550                changed |= if do_parallel {
551                    tables_merging
552                        .par_iter_mut()
553                        .map(|(_, (info, buffers))| {
554                            let mut es = ExecutionState::new(db, mem::take(buffers));
555                            info.as_mut().unwrap().table.merge(&mut es).added || es.changed
556                        })
557                        .max()
558                        .unwrap_or(false)
559                } else {
560                    tables_merging
561                        .iter_mut()
562                        .map(|(_, (info, buffers))| {
563                            let mut es = ExecutionState::new(db, mem::take(buffers));
564                            info.as_mut().unwrap().table.merge(&mut es).added || es.changed
565                        })
566                        .max()
567                        .unwrap_or(false)
568                };
569                for (id, (table, _)) in tables_merging.drain() {
570                    self.tables.insert(id, table.unwrap());
571                }
572            }
573            ever_changed |= changed;
574        }
575        // Reset all indexes to force an update on the next access.
576        let mut size_estimate = 0;
577        for (_, info) in self.tables.iter_mut() {
578            info.column_indexes.update(|_, ti| {
579                Arc::get_mut(ti).unwrap().reset();
580            });
581            info.indexes.update(|_, ti| {
582                Arc::get_mut(ti).unwrap().reset();
583            });
584            size_estimate += info.table.len();
585        }
586        self.total_size_estimate = size_estimate;
587        ever_changed
588    }
589
590    /// A "fast path" merge method that is not optimized for parallelism and does not respect read
591    /// and write dependencies. This ends up being faster than the full "strata-aware" option in
592    /// the body of `merge_all`.
593    fn merge_simple(&mut self, mut to_merge: SmallVec<[TableId; 4]>) -> bool {
594        let mut changed = false;
595        while !to_merge.is_empty() {
596            for table_id in to_merge.iter().copied() {
597                let mut info = self.tables.unwrap_val(table_id);
598                let mut es = ExecutionState::new(self.read_only_view(), Default::default());
599                changed |= info.table.merge(&mut es).added || es.changed;
600                self.tables.insert(table_id, info);
601            }
602            to_merge = self.notification_list.reset();
603        }
604        changed
605    }
606
607    /// A low-level helper for merging pending updates to a particular function.
608    ///
609    /// Callers should prefer `merge_all`, as the process of merging the data
610    /// for a particular table may cause other updates to be buffered
611    /// elesewhere. The `merge_all` method runs merges to a fixed point to avoid
612    /// surprises here.
613    pub fn merge_table(&mut self, table: TableId) -> bool {
614        let mut info = self.tables.unwrap_val(table);
615        self.total_size_estimate = self.total_size_estimate.wrapping_sub(info.table.len());
616        let table_changed = info.table.merge(&mut ExecutionState::new(
617            self.read_only_view(),
618            Default::default(),
619        ));
620        self.total_size_estimate = self.total_size_estimate.wrapping_add(info.table.len());
621        self.tables.insert(table, info);
622        table_changed.added
623    }
624
625    /// Get id of the next table to be added to the database.
626    ///
627    /// This can be useful for "knot tying", when tables need to reference their
628    /// own id.
629    pub fn next_table_id(&self) -> TableId {
630        self.tables.next_id()
631    }
632
633    /// Add a table with the given schema to the database.
634    ///
635    /// The table must have a compatible spec with `types` (e.g. same number of
636    /// columns).
637    pub fn add_table<T: Table + Sized + 'static>(
638        &mut self,
639        table: T,
640        read_deps: impl IntoIterator<Item = TableId>,
641        write_deps: impl IntoIterator<Item = TableId>,
642    ) -> TableId {
643        self.add_table_impl(table, None, read_deps, write_deps)
644    }
645
646    pub fn add_table_named<T: Table + Sized + 'static>(
647        &mut self,
648        table: T,
649        name: Arc<str>,
650        read_deps: impl IntoIterator<Item = TableId>,
651        write_deps: impl IntoIterator<Item = TableId>,
652    ) -> TableId {
653        self.add_table_impl(table, Some(name), read_deps, write_deps)
654    }
655
656    fn add_table_impl<T: Table + Sized + 'static>(
657        &mut self,
658        table: T,
659        name: Option<Arc<str>>,
660        read_deps: impl IntoIterator<Item = TableId>,
661        write_deps: impl IntoIterator<Item = TableId>,
662    ) -> TableId {
663        let spec = table.spec();
664        let table = WrappedTable::new(table);
665        let res = self.tables.push(TableInfo {
666            name,
667            spec,
668            table,
669            indexes: IndexCatalog::new(),
670            column_indexes: IndexCatalog::new(),
671        });
672        self.deps.add_table(res, read_deps, write_deps);
673        res
674    }
675
676    /// Get direct mutable access to the table.
677    ///
678    /// This method is useful for out-of-band access to databse state.
679    ///
680    /// **NOTE:** It is legal to call [`Table::new_buffer`] on the returned table handle, and use
681    /// that to stage updates to the given table via [`MutationBuffer::stage_insert`] or
682    /// [`MutationBuffer::stage_remove`], however this is *likely to be a source of bugs*.
683    ///
684    /// Updates staged in this way will not cause `table` to be marked as having pending changes in
685    /// the next call to [`Database::merge_all`]. Instead, such users should use
686    /// [`Database::new_buffer`], which plumbs this signal through correctly, or better yet,
687    /// perform all updates through an [`ExecutionState`] or a [`crate::RuleBuilder`]. If these
688    /// options do not work, then calling [`Database::merge_table`] directly will force a merge
689    /// call on the table.
690    pub fn get_table(&self, table: TableId) -> &WrappedTable {
691        &self
692            .tables
693            .get(table)
694            .expect("must access a table that has been declared in this database")
695            .table
696    }
697
698    /// Get a handle on the given table along with metadata about it.
699    ///
700    ///
701    /// **NOTE:** See the note on [`Database::get_table`] around manually staging updates.
702    pub fn get_table_info(&self, table: TableId) -> &TableInfo {
703        self.tables
704            .get(table)
705            .expect("must access a table that has been declared in this database")
706    }
707
708    /// Create a new mutation buffer for the table with id `id`.
709    ///
710    /// This will marked the given table as potentially changed for the next round of merging.
711    /// Unlike calling [`Table::new_buffer`] on a table returned from a getter, this method also
712    /// triggers change notification metadata that is read by [`Database::merge_all`].
713    pub fn new_buffer(&self, id: TableId) -> Box<dyn MutationBuffer> {
714        self.notification_list.notify(id);
715        self.get_table(id).new_buffer()
716    }
717
718    pub(crate) fn process_constraints(
719        &self,
720        table: TableId,
721        cs: &[Constraint],
722    ) -> ProcessedConstraints {
723        let table_info = &self.tables[table];
724        let (mut subset, mut fast, mut slow) = table_info.table.split_fast_slow(cs);
725        slow.retain(|c| {
726            let (col, val) = match c {
727                Constraint::EqConst { col, val } => (*col, *val),
728                Constraint::Eq { .. }
729                | Constraint::LtConst { .. }
730                | Constraint::GtConst { .. }
731                | Constraint::LeConst { .. }
732                | Constraint::GeConst { .. } => return true,
733            };
734            // We are looking up by a constant: this is something we can build
735            // an index for as long as the column is cacheable.
736            if *table_info
737                .spec
738                .uncacheable_columns
739                .get(col)
740                .unwrap_or(&false)
741            {
742                return true;
743            }
744            // We have or will build an index: upgrade this constraint to
745            // 'fast'.
746            fast.push(c.clone());
747            let index = get_column_index_from_tableinfo(table_info, col);
748            match index.get().unwrap().get_subset(&val) {
749                Some(s) => {
750                    with_pool_set(|ps| subset.intersect(s, &ps.get_pool()));
751                }
752                None => {
753                    // There are no rows matching this key! We can constrain this to nothing.
754                    subset = Subset::empty();
755                }
756            }
757            // Remove this constraint from the slow list.
758            false
759        });
760        ProcessedConstraints { subset, fast, slow }
761    }
762
763    /// Get direct mutable access to the table.
764    ///
765    /// This method is useful for out-of-band access to databse state.
766    ///
767    /// **NOTE:** See the warning around staging updates to handles returned through this method in
768    /// the documentation for [`Database::get_table`].
769    pub fn get_table_mut(&mut self, id: TableId) -> &mut dyn Table {
770        &mut *self
771            .tables
772            .get_mut(id)
773            .expect("must access a table that has been declared in this database")
774            .table
775    }
776
777    pub(crate) fn plan_query(&mut self, query: Query) -> Plan {
778        plan::plan_query(query)
779    }
780}
781
782impl Drop for Database {
783    fn drop(&mut self) {
784        // Clean up the ambient thread pool.
785        //
786        // Calling mem::forget on the egraph can result in much faster execution times.
787        with_pool_set(PoolSet::clear);
788        rayon::broadcast(|_| with_pool_set(PoolSet::clear));
789    }
790}
791
792/// The core logic behind getting and updating a hash index.
793///
794/// This is in a separate function to allow us to reuse it while already
795/// borrowing a `TableInfo`.
796fn get_index_from_tableinfo(table_info: &TableInfo, cols: &[ColumnId]) -> HashIndex {
797    let index: Arc<_> = table_info.indexes.get_or_insert(cols.into(), || {
798        Arc::new(ResettableOnceLock::new(Index::new(
799            cols.to_vec(),
800            TupleIndex::new(cols.len()),
801        )))
802    });
803    index.get_or_update(|index| {
804        index.refresh(table_info.table.as_ref());
805    });
806    debug_assert!(
807        !index
808            .get()
809            .unwrap()
810            .needs_refresh(table_info.table.as_ref())
811    );
812    index
813}
814
815/// The core logic behind getting and updating a column index.
816///
817/// This is the single-column analog to [`get_index_from_tableinfo`].
818fn get_column_index_from_tableinfo(table_info: &TableInfo, col: ColumnId) -> HashColumnIndex {
819    let index: Arc<_> = table_info.column_indexes.get_or_insert(col, || {
820        Arc::new(ResettableOnceLock::new(Index::new(
821            vec![col],
822            ColumnIndex::new(),
823        )))
824    });
825    index.get_or_update(|index| {
826        index.refresh(table_info.table.as_ref());
827    });
828    debug_assert!(
829        !index
830            .get()
831            .unwrap()
832            .needs_refresh(table_info.table.as_ref())
833    );
834    index
835}