1use fnv::{FnvHashMap as Map, FnvHashSet as Set};
2use proc_macro2::TokenStream;
3use quote::{quote, ToTokens, TokenStreamExt};
4use syn::Ident;
5
6use crate::graph::{Graph, Meta, Node, NodeId, Range};
7use crate::leaf::Leaf;
8use crate::util::ToIdent;
9
10mod context;
11mod fork;
12mod leaf;
13mod rope;
14mod tables;
15
16use self::context::Context;
17use self::tables::TableStack;
18
19pub struct Generator<'a> {
20 name: &'a Ident,
22 this: &'a TokenStream,
24 root: NodeId,
26 graph: &'a Graph<Leaf<'a>>,
28 meta: Meta,
30 rendered: TokenStream,
32 fns: Set<(NodeId, Context)>,
34 idents: Map<(NodeId, Context), Ident>,
36 gotos: Map<(NodeId, Context), TokenStream>,
39 tests: Map<Vec<Range>, Ident>,
42 tables: TableStack,
44}
45
46impl<'a> Generator<'a> {
47 pub fn new(
48 name: &'a Ident,
49 this: &'a TokenStream,
50 root: NodeId,
51 graph: &'a Graph<Leaf>,
52 ) -> Self {
53 let rendered = Self::fast_loop_macro();
54 let meta = Meta::analyze(root, graph);
55
56 Generator {
57 name,
58 this,
59 root,
60 graph,
61 meta,
62 rendered,
63 fns: Set::default(),
64 idents: Map::default(),
65 gotos: Map::default(),
66 tests: Map::default(),
67 tables: TableStack::new(),
68 }
69 }
70
71 pub fn generate(mut self) -> TokenStream {
72 let root = self.goto(self.root, Context::default()).clone();
73 let rendered = &self.rendered;
74 let tables = &self.tables;
75
76 quote! {
77 #tables
78 #rendered
79 #root
80 }
81 }
82
83 fn generate_fn(&mut self, id: NodeId, ctx: Context) {
84 if self.fns.contains(&(id, ctx)) {
85 return;
86 }
87 self.fns.insert((id, ctx));
88
89 let body = match &self.graph[id] {
90 Node::Fork(fork) => self.generate_fork(id, fork, ctx),
91 Node::Rope(rope) => self.generate_rope(rope, ctx),
92 Node::Leaf(leaf) => self.generate_leaf(leaf, ctx),
93 };
94 let ident = self.generate_ident(id, ctx);
95 let out = quote! {
96 #[inline]
97 fn #ident<'s>(lex: &mut Lexer<'s>) {
98 #body
99 }
100 };
101
102 self.rendered.append_all(out);
103 }
104
105 fn goto(&mut self, id: NodeId, mut ctx: Context) -> &TokenStream {
106 let key = (id, ctx);
107
108 #[allow(clippy::map_entry)]
111 if !self.gotos.contains_key(&key) {
112 let meta = &self.meta[id];
113 let enters_loop = !meta.loop_entry_from.is_empty();
114
115 let bump = if enters_loop || !ctx.can_backtrack() {
116 ctx.switch(self.graph[id].miss())
117 } else {
118 None
119 };
120
121 let bump = match (bump, enters_loop, meta.min_read) {
122 (Some(t), _, _) => Some(t),
123 (None, true, _) => ctx.bump(),
124 (None, false, 0) => ctx.bump(),
125 (None, false, _) => None,
126 };
127
128 if meta.min_read == 0 || ctx.remainder() < meta.min_read {
129 ctx.wipe();
130 }
131
132 let ident = self.generate_ident(id, ctx);
133 let mut call_site = quote!(#ident(lex));
134
135 if let Some(bump) = bump {
136 call_site = quote!({
137 #bump
138 #call_site
139 });
140 }
141 self.gotos.insert(key, call_site);
142 self.generate_fn(id, ctx);
143 }
144 &self.gotos[&key]
145 }
146
147 fn generate_ident(&mut self, id: NodeId, ctx: Context) -> &Ident {
148 self.idents.entry((id, ctx)).or_insert_with(|| {
149 let mut ident = format!("goto{}", id);
150
151 ctx.write_suffix(&mut ident);
152
153 ident.to_ident()
154 })
155 }
156
157 fn generate_test(&mut self, ranges: Vec<Range>) -> &Ident {
161 if !self.tests.contains_key(&ranges) {
162 let idx = self.tests.len();
163 let ident = format!("pattern{}", idx).to_ident();
164
165 let lo = ranges.first().unwrap().start;
166 let hi = ranges.last().unwrap().end;
167
168 let body = match ranges.len() {
169 0..=2 => {
170 quote! {
171 match byte {
172 #(#ranges)|* => true,
173 _ => false,
174 }
175 }
176 }
177 _ if hi - lo < 64 => {
178 let mut offset = hi.saturating_sub(63);
179
180 while offset.count_ones() > 1 && lo - offset > 0 {
181 offset += 1;
182 }
183
184 let mut table = 0u64;
185
186 for byte in ranges.iter().flat_map(|range| *range) {
187 if byte - offset >= 64 {
188 panic!("{:#?} {} {} {}", ranges, hi, lo, offset);
189 }
190 table |= 1 << (byte - offset);
191 }
192
193 let search = match offset {
194 0 => quote!(byte),
195 _ => quote!(byte.wrapping_sub(#offset)),
196 };
197
198 quote! {
199 const LUT: u64 = #table;
200
201 match 1u64.checked_shl(#search as u32) {
202 Some(shift) => LUT & shift != 0,
203 None => false,
204 }
205 }
206 }
207 _ => {
208 let mut view = self.tables.view();
209
210 for byte in ranges.iter().flat_map(|range| *range) {
211 view.flag(byte);
212 }
213
214 let mask = view.mask();
215 let lut = view.ident();
216
217 quote! {
218 #lut[byte as usize] & #mask > 0
219 }
220 }
221 };
222 self.rendered.append_all(quote! {
223 #[inline]
224 fn #ident(byte: u8) -> bool {
225 #body
226 }
227 });
228 self.tests.insert(ranges.clone(), ident);
229 }
230 &self.tests[&ranges]
231 }
232}
233
234macro_rules! match_quote {
235 ($source:expr; $($byte:tt,)* ) => {match $source {
236 $( $byte => quote!($byte), )*
237 byte => quote!(#byte),
238 }}
239}
240
241fn byte_to_tokens(byte: u8) -> TokenStream {
242 match_quote! {
243 byte;
244 b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9',
245 b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
246 b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
247 b'u', b'v', b'w', b'x', b'y', b'z',
248 b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J',
249 b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T',
250 b'U', b'V', b'W', b'X', b'Y', b'Z',
251 b'!', b'@', b'#', b'$', b'%', b'^', b'&', b'*', b'(', b')',
252 b'{', b'}', b'[', b']', b'<', b'>', b'-', b'=', b'_', b'+',
253 b':', b';', b',', b'.', b'/', b'?', b'|', b'"', b'\'', b'\\',
254 }
255}
256
257impl ToTokens for Range {
258 fn to_tokens(&self, tokens: &mut TokenStream) {
259 let Range { start, end } = self;
260
261 tokens.append_all(byte_to_tokens(*start));
262
263 if start != end {
264 tokens.append_all(quote!(..=));
265 tokens.append_all(byte_to_tokens(*end));
266 }
267 }
268}