egglog_reports/
lib.rs

1use clap::clap_derive::ValueEnum;
2use rustc_hash::FxHasher;
3use serde::Serialize;
4use std::{
5    fmt::{Display, Formatter},
6    hash::BuildHasherDefault,
7    sync::Arc,
8};
9use web_time::Duration;
10
11pub(crate) type HashMap<K, V> = hashbrown::HashMap<K, V, BuildHasherDefault<FxHasher>>;
12pub(crate) type IndexSet<T> = indexmap::IndexSet<T, BuildHasherDefault<FxHasher>>;
13
14#[derive(ValueEnum, Default, Serialize, Debug, Clone, Copy)]
15pub enum ReportLevel {
16    /// Report only the time taken in each search/apply, merge, rebuild phase.
17    #[default]
18    TimeOnly,
19    /// Report [`ReportLevel::TimeOnly`] and query plan for each rule
20    WithPlan,
21    /// Report [`ReportLevel::WithPlan`] and the detailed statistics at each stage of the query plan.
22    StageInfo,
23}
24
25#[derive(Serialize, Clone, Debug)]
26pub struct SingleScan(pub String, pub (String, i64));
27#[derive(Serialize, Clone, Debug)]
28pub struct Scan(pub String, pub Vec<(String, i64)>);
29
30#[derive(Serialize, Clone, Debug)]
31pub enum Stage {
32    Intersect {
33        scans: Vec<SingleScan>,
34    },
35    FusedIntersect {
36        cover: Scan,             // build side
37        to_intersect: Vec<Scan>, // probe sides
38    },
39}
40
41#[derive(Serialize, Clone, Debug)]
42pub struct StageStats {
43    pub num_candidates: usize,
44    pub num_succeeded: usize,
45}
46
47#[derive(Serialize, Clone, Debug, Default)]
48pub struct Plan {
49    pub stages: Vec<(
50        Stage,
51        Option<StageStats>,
52        // indices of next stages
53        Vec<usize>,
54    )>,
55}
56
57#[derive(Debug, Serialize, Clone, Default)]
58pub struct RuleReport {
59    pub plan: Option<Plan>,
60    pub search_and_apply_time: Duration,
61    // TODO: succeeding matches
62    pub num_matches: usize,
63}
64
65#[derive(Debug, Serialize, Clone, Default)]
66pub struct RuleSetReport {
67    pub changed: bool,
68    pub rule_reports: HashMap<Arc<str>, Vec<RuleReport>>,
69    pub search_and_apply_time: Duration,
70    pub merge_time: Duration,
71}
72
73impl RuleSetReport {
74    pub fn num_matches(&self, rule: &str) -> usize {
75        self.rule_reports
76            .get(rule)
77            .map(|r| r.iter().map(|r| r.num_matches).sum())
78            .unwrap_or(0)
79    }
80
81    pub fn rule_search_and_apply_time(&self, rule: &str) -> Duration {
82        self.rule_reports
83            .get(rule)
84            .map(|r| r.iter().map(|r| r.search_and_apply_time).sum())
85            .unwrap_or(Duration::ZERO)
86    }
87}
88
89#[derive(Debug, Serialize, Clone, Default)]
90pub struct IterationReport {
91    pub rule_set_report: RuleSetReport,
92    pub rebuild_time: Duration,
93}
94
95impl IterationReport {
96    pub fn changed(&self) -> bool {
97        self.rule_set_report.changed
98    }
99
100    pub fn search_and_apply_time(&self) -> Duration {
101        self.rule_set_report.search_and_apply_time
102    }
103
104    pub fn rule_reports(&self) -> &HashMap<Arc<str>, Vec<RuleReport>> {
105        &self.rule_set_report.rule_reports
106    }
107
108    pub fn rules(&self) -> impl Iterator<Item = &Arc<str>> {
109        self.rule_set_report.rule_reports.keys()
110    }
111}
112
113/// Running a schedule produces a report of the results.
114/// This includes rough timing information and whether
115/// the database was updated.
116/// Calling `union` on two run reports adds the timing
117/// information together.
118#[derive(Debug, Serialize, Clone)]
119pub struct RunReport {
120    // Since `IterationReport`s are immutable, we can reference count them to avoid
121    // expensive cloning when e-graphs are cloned.
122    pub iterations: Vec<Arc<IterationReport>>,
123    /// If any changes were made to the database.
124    pub updated: bool,
125    /// True if this run observed no database changes and there is no deferred
126    /// scheduler work requiring another iteration.
127    pub can_stop: bool,
128    pub search_and_apply_time_per_rule: HashMap<Arc<str>, Duration>,
129    pub num_matches_per_rule: HashMap<Arc<str>, usize>,
130    pub search_and_apply_time_per_ruleset: HashMap<Arc<str>, Duration>,
131    pub merge_time_per_ruleset: HashMap<Arc<str>, Duration>,
132    pub rebuild_time_per_ruleset: HashMap<Arc<str>, Duration>,
133}
134
135impl Default for RunReport {
136    fn default() -> Self {
137        Self {
138            iterations: Vec::new(),
139            updated: false,
140            can_stop: true,
141            search_and_apply_time_per_rule: HashMap::default(),
142            num_matches_per_rule: HashMap::default(),
143            search_and_apply_time_per_ruleset: HashMap::default(),
144            merge_time_per_ruleset: HashMap::default(),
145            rebuild_time_per_ruleset: HashMap::default(),
146        }
147    }
148}
149
150impl Display for RunReport {
151    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
152        let mut rule_times_vec: Vec<_> = self.search_and_apply_time_per_rule.iter().collect();
153        rule_times_vec.sort_by_key(|(_, time)| **time);
154
155        for (rule, time) in rule_times_vec {
156            let name = Self::truncate_rule_name(rule.to_string());
157            let time = time.as_secs_f64();
158            let num_matches = self.num_matches_per_rule.get(rule).copied().unwrap_or(0);
159            writeln!(
160                f,
161                "Rule {name}: search and apply {time:.3}s, num matches {num_matches}",
162            )?;
163        }
164
165        let rulesets = self
166            .search_and_apply_time_per_ruleset
167            .keys()
168            .chain(self.merge_time_per_ruleset.keys())
169            .chain(self.rebuild_time_per_ruleset.keys())
170            .collect::<IndexSet<_>>();
171
172        for ruleset in rulesets {
173            // print out the search and apply time for rule
174            let search_and_apply_time = self
175                .search_and_apply_time_per_ruleset
176                .get(ruleset)
177                .cloned()
178                .unwrap_or(Duration::ZERO)
179                .as_secs_f64();
180            let merge_time = self
181                .merge_time_per_ruleset
182                .get(ruleset)
183                .cloned()
184                .unwrap_or(Duration::ZERO)
185                .as_secs_f64();
186            let rebuild_time = self
187                .rebuild_time_per_ruleset
188                .get(ruleset)
189                .cloned()
190                .unwrap_or(Duration::ZERO)
191                .as_secs_f64();
192            writeln!(
193                f,
194                "Ruleset {ruleset}: search {search_and_apply_time:.3}s, merge {merge_time:.3}s, rebuild {rebuild_time:.3}s",
195            )?;
196        }
197
198        Ok(())
199    }
200}
201
202impl RunReport {
203    /// add a ... and a maximum size to the name
204    /// for printing, since they may be the rule itself
205    fn truncate_rule_name(mut s: String) -> String {
206        // replace newlines in s with a space
207        s = s.replace('\n', " ");
208        if s.len() > 80 {
209            s.truncate(80);
210            s.push_str("...");
211        }
212        s
213    }
214
215    fn union_times(
216        times: &mut HashMap<Arc<str>, Duration>,
217        other_times: HashMap<Arc<str>, Duration>,
218    ) {
219        for (k, v) in other_times {
220            *times.entry(k).or_default() += v;
221        }
222    }
223
224    fn union_counts(counts: &mut HashMap<Arc<str>, usize>, other_counts: HashMap<Arc<str>, usize>) {
225        for (k, v) in other_counts {
226            *counts.entry(k).or_default() += v;
227        }
228    }
229
230    pub fn singleton(ruleset: &str, iteration: IterationReport) -> Self {
231        let mut report = RunReport::default();
232
233        for rule in iteration.rules() {
234            *report
235                .search_and_apply_time_per_rule
236                .entry(rule.clone())
237                .or_default() += iteration.rule_set_report.rule_search_and_apply_time(rule);
238            *report.num_matches_per_rule.entry(rule.clone()).or_default() +=
239                iteration.rule_set_report.num_matches(rule);
240        }
241
242        let ruleset: Arc<str> = ruleset.into();
243        let per_ruleset = |x| [(ruleset.clone(), x)].into_iter().collect();
244
245        report.search_and_apply_time_per_ruleset = per_ruleset(iteration.search_and_apply_time());
246        report.merge_time_per_ruleset = per_ruleset(iteration.rule_set_report.merge_time);
247        report.rebuild_time_per_ruleset = per_ruleset(iteration.rebuild_time);
248        report.updated = iteration.changed();
249        report.can_stop = !report.updated;
250        report.iterations.push(Arc::new(iteration));
251
252        report
253    }
254
255    pub fn add_iteration(&mut self, ruleset: &str, iteration: IterationReport) {
256        self.union(RunReport::singleton(ruleset, iteration));
257    }
258
259    /// Merge two reports.
260    pub fn union(&mut self, other: Self) {
261        self.iterations.extend(other.iterations);
262        self.updated |= other.updated;
263        self.can_stop &= other.can_stop;
264        RunReport::union_times(
265            &mut self.search_and_apply_time_per_rule,
266            other.search_and_apply_time_per_rule,
267        );
268        RunReport::union_counts(&mut self.num_matches_per_rule, other.num_matches_per_rule);
269        RunReport::union_times(
270            &mut self.search_and_apply_time_per_ruleset,
271            other.search_and_apply_time_per_ruleset,
272        );
273        RunReport::union_times(
274            &mut self.merge_time_per_ruleset,
275            other.merge_time_per_ruleset,
276        );
277        RunReport::union_times(
278            &mut self.rebuild_time_per_ruleset,
279            other.rebuild_time_per_ruleset,
280        );
281    }
282}