1#![recursion_limit = "196"]
9#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")]
10
11mod error;
12mod generator;
13#[cfg(not(feature = "fuzzing"))]
14mod graph;
15#[cfg(feature = "fuzzing")]
16pub mod graph;
17mod leaf;
18#[cfg(not(feature = "fuzzing"))]
19mod mir;
20#[cfg(feature = "fuzzing")]
21pub mod mir;
22mod parser;
23mod util;
24
25#[macro_use]
26#[allow(missing_docs)]
27mod macros;
28
29use generator::Generator;
30use graph::{DisambiguationError, Fork, Graph, Rope};
31use leaf::Leaf;
32use parser::{IgnoreFlags, Mode, Parser};
33use quote::ToTokens;
34use util::MaybeVoid;
35
36use proc_macro2::{Delimiter, TokenStream, TokenTree};
37use quote::quote;
38use syn::parse_quote;
39use syn::spanned::Spanned;
40use syn::{Fields, ItemEnum};
41
42const LOGOS_ATTR: &str = "logos";
43const ERROR_ATTR: &str = "error";
44const TOKEN_ATTR: &str = "token";
45const REGEX_ATTR: &str = "regex";
46
47pub fn generate(input: TokenStream) -> TokenStream {
49 debug!("Reading input token streams");
50
51 let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
52
53 let name = &item.ident;
54
55 let mut parser = Parser::default();
56
57 for param in item.generics.params {
58 parser.parse_generic(param);
59 }
60
61 for attr in &mut item.attrs {
62 parser.try_parse_logos(attr);
63 }
64
65 let mut ropes = Vec::new();
66 let mut regex_ids = Vec::new();
67 let mut graph = Graph::new();
68
69 {
70 let errors = &mut parser.errors;
71
72 for literal in &parser.skips {
73 match literal.to_mir(&parser.subpatterns, IgnoreFlags::Empty, errors) {
74 Ok(mir) => {
75 let then = graph.push(Leaf::new_skip(literal.span()).priority(mir.priority()));
76 let id = graph.regex(mir, then);
77
78 regex_ids.push(id);
79 }
80 Err(err) => {
81 errors.err(err, literal.span());
82 }
83 }
84 }
85 }
86
87 debug!("Iterating through enum variants");
88
89 for variant in &mut item.variants {
90 let field = match &mut variant.fields {
91 Fields::Unit => MaybeVoid::Void,
92 Fields::Unnamed(fields) => {
93 if fields.unnamed.len() != 1 {
94 parser.err(
95 format!(
96 "Logos currently only supports variants with one field, found {}",
97 fields.unnamed.len(),
98 ),
99 fields.span(),
100 );
101 }
102
103 let ty = &mut fields
104 .unnamed
105 .first_mut()
106 .expect("Already checked len; qed")
107 .ty;
108 let ty = parser.get_type(ty);
109
110 MaybeVoid::Some(ty)
111 }
112 Fields::Named(fields) => {
113 parser.err("Logos doesn't support named fields yet.", fields.span());
114
115 MaybeVoid::Void
116 }
117 };
118
119 let var_ident = &variant.ident;
121 let leaf = move |span| Leaf::new(var_ident, span).field(field.clone());
122
123 for attr in &mut variant.attrs {
124 let attr_name = match attr.path().get_ident() {
125 Some(ident) => ident.to_string(),
126 None => continue,
127 };
128
129 match attr_name.as_str() {
130 ERROR_ATTR => {
131 parser.err(
133 "\
134 Since 0.13 Logos no longer requires the #[error] variant.\n\
135 \n\
136 For help with migration see release notes: \
137 https://github.com/maciejhirsz/logos/releases\
138 ",
139 attr.span(),
140 );
141 }
142 TOKEN_ATTR => {
143 let definition = match parser.parse_definition(attr) {
144 Some(definition) => definition,
145 None => {
146 parser.err("Expected #[token(...)]", attr.span());
147 continue;
148 }
149 };
150
151 if definition.ignore_flags.is_empty() {
152 let bytes = definition.literal.to_bytes();
153 let then = graph.push(
154 leaf(definition.literal.span())
155 .priority(definition.priority.unwrap_or(bytes.len() * 2))
156 .callback(definition.callback),
157 );
158
159 ropes.push(Rope::new(bytes, then));
160 } else {
161 let mir = definition
162 .literal
163 .escape_regex()
164 .to_mir(
165 &Default::default(),
166 definition.ignore_flags,
167 &mut parser.errors,
168 )
169 .expect("The literal should be perfectly valid regex");
170
171 let then = graph.push(
172 leaf(definition.literal.span())
173 .priority(definition.priority.unwrap_or_else(|| mir.priority()))
174 .callback(definition.callback),
175 );
176 let id = graph.regex(mir, then);
177
178 regex_ids.push(id);
179 }
180 }
181 REGEX_ATTR => {
182 let definition = match parser.parse_definition(attr) {
183 Some(definition) => definition,
184 None => {
185 parser.err("Expected #[regex(...)]", attr.span());
186 continue;
187 }
188 };
189 let mir = match definition.literal.to_mir(
190 &parser.subpatterns,
191 definition.ignore_flags,
192 &mut parser.errors,
193 ) {
194 Ok(mir) => mir,
195 Err(err) => {
196 parser.err(err, definition.literal.span());
197 continue;
198 }
199 };
200
201 let then = graph.push(
202 leaf(definition.literal.span())
203 .priority(definition.priority.unwrap_or_else(|| mir.priority()))
204 .callback(definition.callback),
205 );
206 let id = graph.regex(mir, then);
207
208 regex_ids.push(id);
209 }
210 _ => (),
211 }
212 }
213 }
214
215 let mut root = Fork::new();
216
217 debug!("Parsing additional options (extras, source, ...)");
218
219 let error_type = parser.error_type.take();
220 let extras = parser.extras.take();
221 let source = parser
222 .source
223 .take()
224 .map(strip_wrapping_parens)
225 .unwrap_or(match parser.mode {
226 Mode::Utf8 => quote!(str),
227 Mode::Binary => quote!([u8]),
228 });
229 let logos_path = parser
230 .logos_path
231 .take()
232 .unwrap_or_else(|| parse_quote!(::logos));
233
234 let generics = parser.generics();
235 let this = quote!(#name #generics);
236
237 let impl_logos = |body| {
238 quote! {
239 impl<'s> #logos_path::Logos<'s> for #this {
240 type Error = #error_type;
241
242 type Extras = #extras;
243
244 type Source = #source;
245
246 fn lex(lex: &mut #logos_path::Lexer<'s, Self>) {
247 #body
248 }
249 }
250 }
251 };
252
253 for id in regex_ids {
254 let fork = graph.fork_off(id);
255
256 root.merge(fork, &mut graph);
257 }
258 for rope in ropes {
259 root.merge(rope.into_fork(&mut graph), &mut graph);
260 }
261 while let Some(id) = root.miss.take() {
262 let fork = graph.fork_off(id);
263
264 if fork.branches().next().is_some() {
265 root.merge(fork, &mut graph);
266 } else {
267 break;
268 }
269 }
270
271 debug!("Checking if any two tokens have the same priority");
272
273 for &DisambiguationError(a, b) in graph.errors() {
274 let a = graph[a].unwrap_leaf();
275 let b = graph[b].unwrap_leaf();
276 let disambiguate = a.priority + 1;
277
278 let mut err = |a: &Leaf, b: &Leaf| {
279 parser.err(
280 format!(
281 "\
282 A definition of variant `{a}` can match the same input as another definition of variant `{b}`.\n\
283 \n\
284 hint: Consider giving one definition a higher priority: \
285 #[regex(..., priority = {disambiguate})]\
286 ",
287 ),
288 a.span
289 );
290 };
291
292 err(a, b);
293 err(b, a);
294 }
295
296 if let Some(errors) = parser.errors.render() {
297 return impl_logos(errors);
298 }
299
300 let root = graph.push(root);
301
302 graph.shake(root);
303
304 debug!("Generating code from graph:\n{graph:#?}");
305
306 let generator = Generator::new(name, &this, root, &graph);
307
308 let body = generator.generate();
309 impl_logos(quote! {
310 use #logos_path::internal::{LexerInternal, CallbackResult};
311
312 type Lexer<'s> = #logos_path::Lexer<'s, #this>;
313
314 fn _end<'s>(lex: &mut Lexer<'s>) {
315 lex.end()
316 }
317
318 fn _error<'s>(lex: &mut Lexer<'s>) {
319 lex.bump_unchecked(1);
320
321 lex.error();
322 }
323
324 #body
325 })
326}
327
328pub fn strip_attributes(input: TokenStream) -> TokenStream {
330 let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
331
332 strip_attrs_from_vec(&mut item.attrs);
333
334 for attr in &mut item.attrs {
335 if let syn::Meta::List(meta) = &mut attr.meta {
336 if meta.path.is_ident("derive") {
337 let mut tokens =
338 std::mem::replace(&mut meta.tokens, TokenStream::new()).into_iter();
339
340 while let Some(TokenTree::Ident(ident)) = tokens.next() {
341 let punct = tokens.next();
342
343 if ident == "Logos" {
344 continue;
345 }
346
347 meta.tokens.extend([TokenTree::Ident(ident)]);
348 meta.tokens.extend(punct);
349 }
350 }
351 }
352 }
353
354 for variant in &mut item.variants {
355 strip_attrs_from_vec(&mut variant.attrs);
356 for field in &mut variant.fields {
357 strip_attrs_from_vec(&mut field.attrs);
358 }
359 }
360
361 item.to_token_stream()
362}
363
364fn strip_attrs_from_vec(attrs: &mut Vec<syn::Attribute>) {
365 attrs.retain(|attr| !is_logos_attr(attr))
366}
367
368fn is_logos_attr(attr: &syn::Attribute) -> bool {
369 attr.path().is_ident(LOGOS_ATTR)
370 || attr.path().is_ident(TOKEN_ATTR)
371 || attr.path().is_ident(REGEX_ATTR)
372}
373
374fn strip_wrapping_parens(t: TokenStream) -> TokenStream {
375 let tts: Vec<TokenTree> = t.into_iter().collect();
376
377 if tts.len() != 1 {
378 tts.into_iter().collect()
379 } else {
380 match tts.into_iter().next().unwrap() {
381 TokenTree::Group(g) => {
382 if g.delimiter() == Delimiter::Parenthesis {
383 g.stream()
384 } else {
385 core::iter::once(TokenTree::Group(g)).collect()
386 }
387 }
388 tt => core::iter::once(tt).collect(),
389 }
390 }
391}