logos_codegen/generator/
context.rs

1use proc_macro2::TokenStream;
2use quote::quote;
3
4use crate::generator::Generator;
5use crate::graph::NodeId;
6
7/// This struct keeps track of bytes available to be read without
8/// bounds checking across the tree.
9///
10/// For example, a branch that matches 4 bytes followed by a fork
11/// with smallest branch containing of 2 bytes can do a bounds check
12/// for 6 bytes ahead, and leave the remaining 2 byte array (fixed size)
13/// to be handled by the fork, avoiding bound checks there.
14#[derive(Default, Clone, Copy, PartialEq, Eq, Hash, Debug)]
15pub struct Context {
16    /// Amount of bytes that haven't been bumped yet but should
17    /// before a new read is performed
18    at: usize,
19    /// Number of bytes available without bound checks
20    available: usize,
21    /// Whether or not the Lexer has been bumped at least by 1 byte
22    bumped: bool,
23    /// Node to backtrack to to in case an explicit match has failed.
24    /// If `None` will instead produce an error token.
25    backtrack: Option<NodeId>,
26}
27
28impl Context {
29    pub fn can_backtrack(&self) -> bool {
30        self.backtrack.is_some()
31    }
32
33    pub fn switch(&mut self, miss: Option<NodeId>) -> Option<TokenStream> {
34        self.backtrack = Some(miss?);
35        self.bump()
36    }
37
38    pub const fn advance(self, n: usize) -> Self {
39        Context {
40            at: self.at + n,
41            ..self
42        }
43    }
44
45    pub fn bump(&mut self) -> Option<TokenStream> {
46        match self.at {
47            0 => None,
48            n => {
49                let tokens = quote!(lex.bump_unchecked(#n););
50                self.at = 0;
51                self.available = 0;
52                self.bumped = true;
53                Some(tokens)
54            }
55        }
56    }
57
58    pub fn remainder(&self) -> usize {
59        self.available.saturating_sub(self.at)
60    }
61
62    pub fn read_byte(&mut self) -> TokenStream {
63        let at = self.at;
64
65        self.advance(1);
66
67        #[cfg(not(feature = "forbid_unsafe"))]
68        {
69            quote!(unsafe { lex.read_byte_unchecked(#at) })
70        }
71
72        #[cfg(feature = "forbid_unsafe")]
73        {
74            quote!(lex.read_byte(#at))
75        }
76    }
77
78    pub fn read(&mut self, len: usize) -> TokenStream {
79        self.available = len;
80
81        match (self.at, len) {
82            (0, 0) => quote!(lex.read::<u8>()),
83            (a, 0) => quote!(lex.read_at::<u8>(#a)),
84            (0, l) => quote!(lex.read::<&[u8; #l]>()),
85            (a, l) => quote!(lex.read_at::<&[u8; #l]>(#a)),
86        }
87    }
88
89    pub fn wipe(&mut self) {
90        self.available = 0;
91    }
92
93    const fn backtrack(self) -> Self {
94        Context {
95            at: 0,
96            available: 0,
97            bumped: self.bumped,
98            backtrack: None,
99        }
100    }
101
102    pub fn miss(mut self, miss: Option<NodeId>, gen: &mut Generator) -> TokenStream {
103        self.wipe();
104        match (miss, self.backtrack) {
105            (Some(id), _) => gen.goto(id, self).clone(),
106            (_, Some(id)) => gen.goto(id, self.backtrack()).clone(),
107            _ if self.bumped => quote!(lex.error()),
108            _ => quote!(_error(lex)),
109        }
110    }
111
112    pub fn write_suffix(&self, buf: &mut String) {
113        use std::fmt::Write;
114
115        if self.at > 0 {
116            let _ = write!(buf, "_at{}", self.at);
117        }
118        if self.available > 0 {
119            let _ = write!(buf, "_with{}", self.available);
120        }
121        if let Some(id) = self.backtrack {
122            let _ = write!(buf, "_ctx{}", id);
123        }
124        if self.bumped {
125            buf.push_str("_x");
126        }
127    }
128}