rowan/green/
node_cache.rs

1use hashbrown::hash_map::RawEntryMut;
2use rustc_hash::FxHasher;
3use std::hash::{BuildHasherDefault, Hash, Hasher};
4
5use crate::{
6    green::GreenElementRef, GreenNode, GreenNodeData, GreenToken, GreenTokenData, NodeOrToken,
7    SyntaxKind,
8};
9
10use super::element::GreenElement;
11
12type HashMap<K, V> = hashbrown::HashMap<K, V, BuildHasherDefault<FxHasher>>;
13
14#[derive(Debug)]
15struct NoHash<T>(T);
16
17/// Interner for GreenTokens and GreenNodes
18// XXX: the impl is a bit tricky. As usual when writing interners, we want to
19// store all values in one HashSet.
20//
21// However, hashing trees is fun: hash of the tree is recursively defined. We
22// maintain an invariant -- if the tree is interned, then all of its children
23// are interned as well.
24//
25// That means that computing the hash naively is wasteful -- we just *know*
26// hashes of children, and we can re-use those.
27//
28// So here we use *raw* API of hashbrown and provide the hashes manually,
29// instead of going via a `Hash` impl. Our manual `Hash` and the
30// `#[derive(Hash)]` are actually different! At some point we had a fun bug,
31// where we accidentally mixed the two hashes, which made the cache much less
32// efficient.
33//
34// To fix that, we additionally wrap the data in `NoHash` wrapper, to make sure
35// we don't accidentally use the wrong hash!
36#[derive(Default, Debug)]
37pub struct NodeCache {
38    nodes: HashMap<NoHash<GreenNode>, ()>,
39    tokens: HashMap<NoHash<GreenToken>, ()>,
40}
41
42fn token_hash(token: &GreenTokenData) -> u64 {
43    let mut h = FxHasher::default();
44    token.kind().hash(&mut h);
45    token.text().hash(&mut h);
46    h.finish()
47}
48
49fn node_hash(node: &GreenNodeData) -> u64 {
50    let mut h = FxHasher::default();
51    node.kind().hash(&mut h);
52    for child in node.children() {
53        match child {
54            NodeOrToken::Node(it) => node_hash(it),
55            NodeOrToken::Token(it) => token_hash(it),
56        }
57        .hash(&mut h)
58    }
59    h.finish()
60}
61
62fn element_id(elem: GreenElementRef<'_>) -> *const () {
63    match elem {
64        NodeOrToken::Node(it) => it as *const GreenNodeData as *const (),
65        NodeOrToken::Token(it) => it as *const GreenTokenData as *const (),
66    }
67}
68
69impl NodeCache {
70    pub(crate) fn node(
71        &mut self,
72        kind: SyntaxKind,
73        children: &mut Vec<(u64, GreenElement)>,
74        first_child: usize,
75    ) -> (u64, GreenNode) {
76        let build_node = move |children: &mut Vec<(u64, GreenElement)>| {
77            GreenNode::new(kind, children.drain(first_child..).map(|(_, it)| it))
78        };
79
80        let children_ref = &children[first_child..];
81        if children_ref.len() > 3 {
82            let node = build_node(children);
83            return (0, node);
84        }
85
86        let hash = {
87            let mut h = FxHasher::default();
88            kind.hash(&mut h);
89            for &(hash, _) in children_ref {
90                if hash == 0 {
91                    let node = build_node(children);
92                    return (0, node);
93                }
94                hash.hash(&mut h);
95            }
96            h.finish()
97        };
98
99        // Green nodes are fully immutable, so it's ok to deduplicate them.
100        // This is the same optimization that Roslyn does
101        // https://github.com/KirillOsenkov/Bliki/wiki/Roslyn-Immutable-Trees
102        //
103        // For example, all `#[inline]` in this file share the same green node!
104        // For `libsyntax/parse/parser.rs`, measurements show that deduping saves
105        // 17% of the memory for green nodes!
106        let entry = self.nodes.raw_entry_mut().from_hash(hash, |node| {
107            node.0.kind() == kind && node.0.children().len() == children_ref.len() && {
108                let lhs = node.0.children();
109                let rhs = children_ref.iter().map(|(_, it)| it.as_deref());
110
111                let lhs = lhs.map(element_id);
112                let rhs = rhs.map(element_id);
113
114                lhs.eq(rhs)
115            }
116        });
117
118        let node = match entry {
119            RawEntryMut::Occupied(entry) => {
120                drop(children.drain(first_child..));
121                entry.key().0.clone()
122            }
123            RawEntryMut::Vacant(entry) => {
124                let node = build_node(children);
125                entry.insert_with_hasher(hash, NoHash(node.clone()), (), |n| node_hash(&n.0));
126                node
127            }
128        };
129
130        (hash, node)
131    }
132
133    pub(crate) fn token(&mut self, kind: SyntaxKind, text: &str) -> (u64, GreenToken) {
134        let hash = {
135            let mut h = FxHasher::default();
136            kind.hash(&mut h);
137            text.hash(&mut h);
138            h.finish()
139        };
140
141        let entry = self
142            .tokens
143            .raw_entry_mut()
144            .from_hash(hash, |token| token.0.kind() == kind && token.0.text() == text);
145
146        let token = match entry {
147            RawEntryMut::Occupied(entry) => entry.key().0.clone(),
148            RawEntryMut::Vacant(entry) => {
149                let token = GreenToken::new(kind, text);
150                entry.insert_with_hasher(hash, NoHash(token.clone()), (), |t| token_hash(&t.0));
151                token
152            }
153        };
154
155        (hash, token)
156    }
157}