logos_codegen/generator/
mod.rs

1use fnv::{FnvHashMap as Map, FnvHashSet as Set};
2use proc_macro2::TokenStream;
3use quote::{quote, ToTokens, TokenStreamExt};
4use syn::Ident;
5
6use crate::graph::{Graph, Meta, Node, NodeId, Range};
7use crate::leaf::Leaf;
8use crate::util::ToIdent;
9
10mod context;
11mod fork;
12mod leaf;
13mod rope;
14mod tables;
15
16use self::context::Context;
17use self::tables::TableStack;
18
19pub struct Generator<'a> {
20    /// Name of the type we are implementing the `Logos` trait for
21    name: &'a Ident,
22    /// Name of the type with any generics it might need
23    this: &'a TokenStream,
24    /// Id to the root node
25    root: NodeId,
26    /// Reference to the graph with all of the nodes
27    graph: &'a Graph<Leaf<'a>>,
28    /// Meta data collected for the nodes
29    meta: Meta,
30    /// Buffer with functions growing during generation
31    rendered: TokenStream,
32    /// Set of functions that have already been rendered
33    fns: Set<(NodeId, Context)>,
34    /// Function name identifiers
35    idents: Map<(NodeId, Context), Ident>,
36    /// Local function calls. Note: a call might change its context,
37    /// so we can't use `idents` for this purpose.
38    gotos: Map<(NodeId, Context), TokenStream>,
39    /// Identifiers for helper functions matching a byte to a given
40    /// set of ranges
41    tests: Map<Vec<Range>, Ident>,
42    /// Related to above, table stack manages tables that need to be
43    tables: TableStack,
44}
45
46impl<'a> Generator<'a> {
47    pub fn new(
48        name: &'a Ident,
49        this: &'a TokenStream,
50        root: NodeId,
51        graph: &'a Graph<Leaf>,
52    ) -> Self {
53        let rendered = Self::fast_loop_macro();
54        let meta = Meta::analyze(root, graph);
55
56        Generator {
57            name,
58            this,
59            root,
60            graph,
61            meta,
62            rendered,
63            fns: Set::default(),
64            idents: Map::default(),
65            gotos: Map::default(),
66            tests: Map::default(),
67            tables: TableStack::new(),
68        }
69    }
70
71    pub fn generate(mut self) -> TokenStream {
72        let root = self.goto(self.root, Context::default()).clone();
73        let rendered = &self.rendered;
74        let tables = &self.tables;
75
76        quote! {
77            #tables
78            #rendered
79            #root
80        }
81    }
82
83    fn generate_fn(&mut self, id: NodeId, ctx: Context) {
84        if self.fns.contains(&(id, ctx)) {
85            return;
86        }
87        self.fns.insert((id, ctx));
88
89        let body = match &self.graph[id] {
90            Node::Fork(fork) => self.generate_fork(id, fork, ctx),
91            Node::Rope(rope) => self.generate_rope(rope, ctx),
92            Node::Leaf(leaf) => self.generate_leaf(leaf, ctx),
93        };
94        let ident = self.generate_ident(id, ctx);
95        let out = quote! {
96            #[inline]
97            fn #ident<'s>(lex: &mut Lexer<'s>) {
98                #body
99            }
100        };
101
102        self.rendered.append_all(out);
103    }
104
105    fn goto(&mut self, id: NodeId, mut ctx: Context) -> &TokenStream {
106        let key = (id, ctx);
107
108        // Allow contains_key + insert because self.generate_ident borrows a mutable ref to self
109        // too.
110        #[allow(clippy::map_entry)]
111        if !self.gotos.contains_key(&key) {
112            let meta = &self.meta[id];
113            let enters_loop = !meta.loop_entry_from.is_empty();
114
115            let bump = if enters_loop || !ctx.can_backtrack() {
116                ctx.switch(self.graph[id].miss())
117            } else {
118                None
119            };
120
121            let bump = match (bump, enters_loop, meta.min_read) {
122                (Some(t), _, _) => Some(t),
123                (None, true, _) => ctx.bump(),
124                (None, false, 0) => ctx.bump(),
125                (None, false, _) => None,
126            };
127
128            if meta.min_read == 0 || ctx.remainder() < meta.min_read {
129                ctx.wipe();
130            }
131
132            let ident = self.generate_ident(id, ctx);
133            let mut call_site = quote!(#ident(lex));
134
135            if let Some(bump) = bump {
136                call_site = quote!({
137                    #bump
138                    #call_site
139                });
140            }
141            self.gotos.insert(key, call_site);
142            self.generate_fn(id, ctx);
143        }
144        &self.gotos[&key]
145    }
146
147    fn generate_ident(&mut self, id: NodeId, ctx: Context) -> &Ident {
148        self.idents.entry((id, ctx)).or_insert_with(|| {
149            let mut ident = format!("goto{}", id);
150
151            ctx.write_suffix(&mut ident);
152
153            ident.to_ident()
154        })
155    }
156
157    /// Returns an identifier to a function that matches a byte to any
158    /// of the provided ranges. This will generate either a simple
159    /// match expression, or use a lookup table internally.
160    fn generate_test(&mut self, ranges: Vec<Range>) -> &Ident {
161        if !self.tests.contains_key(&ranges) {
162            let idx = self.tests.len();
163            let ident = format!("pattern{}", idx).to_ident();
164
165            let lo = ranges.first().unwrap().start;
166            let hi = ranges.last().unwrap().end;
167
168            let body = match ranges.len() {
169                0..=2 => {
170                    quote! {
171                        match byte {
172                            #(#ranges)|* => true,
173                            _ => false,
174                        }
175                    }
176                }
177                _ if hi - lo < 64 => {
178                    let mut offset = hi.saturating_sub(63);
179
180                    while offset.count_ones() > 1 && lo - offset > 0 {
181                        offset += 1;
182                    }
183
184                    let mut table = 0u64;
185
186                    for byte in ranges.iter().flat_map(|range| *range) {
187                        if byte - offset >= 64 {
188                            panic!("{:#?} {} {} {}", ranges, hi, lo, offset);
189                        }
190                        table |= 1 << (byte - offset);
191                    }
192
193                    let search = match offset {
194                        0 => quote!(byte),
195                        _ => quote!(byte.wrapping_sub(#offset)),
196                    };
197
198                    quote! {
199                        const LUT: u64 = #table;
200
201                        match 1u64.checked_shl(#search as u32) {
202                            Some(shift) => LUT & shift != 0,
203                            None => false,
204                        }
205                    }
206                }
207                _ => {
208                    let mut view = self.tables.view();
209
210                    for byte in ranges.iter().flat_map(|range| *range) {
211                        view.flag(byte);
212                    }
213
214                    let mask = view.mask();
215                    let lut = view.ident();
216
217                    quote! {
218                        #lut[byte as usize] & #mask > 0
219                    }
220                }
221            };
222            self.rendered.append_all(quote! {
223                #[inline]
224                fn #ident(byte: u8) -> bool {
225                    #body
226                }
227            });
228            self.tests.insert(ranges.clone(), ident);
229        }
230        &self.tests[&ranges]
231    }
232}
233
234macro_rules! match_quote {
235    ($source:expr; $($byte:tt,)* ) => {match $source {
236        $( $byte => quote!($byte), )*
237        byte => quote!(#byte),
238    }}
239}
240
241fn byte_to_tokens(byte: u8) -> TokenStream {
242    match_quote! {
243        byte;
244        b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9',
245        b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
246        b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
247        b'u', b'v', b'w', b'x', b'y', b'z',
248        b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J',
249        b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T',
250        b'U', b'V', b'W', b'X', b'Y', b'Z',
251        b'!', b'@', b'#', b'$', b'%', b'^', b'&', b'*', b'(', b')',
252        b'{', b'}', b'[', b']', b'<', b'>', b'-', b'=', b'_', b'+',
253        b':', b';', b',', b'.', b'/', b'?', b'|', b'"', b'\'', b'\\',
254    }
255}
256
257impl ToTokens for Range {
258    fn to_tokens(&self, tokens: &mut TokenStream) {
259        let Range { start, end } = self;
260
261        tokens.append_all(byte_to_tokens(*start));
262
263        if start != end {
264            tokens.append_all(quote!(..=));
265            tokens.append_all(byte_to_tokens(*end));
266        }
267    }
268}