logos_codegen/
lib.rs

1//! <img src="https://raw.githubusercontent.com/maciejhirsz/logos/master/logos.svg?sanitize=true" alt="Logos logo" width="250" align="right">
2//!
3//! # Logos
4//!
5//! This is a `#[derive]` macro crate, [for documentation go to main crate](https://docs.rs/logos).
6
7// The `quote!` macro requires deep recursion.
8#![recursion_limit = "196"]
9#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")]
10
11mod error;
12mod generator;
13#[cfg(not(feature = "fuzzing"))]
14mod graph;
15#[cfg(feature = "fuzzing")]
16pub mod graph;
17mod leaf;
18#[cfg(not(feature = "fuzzing"))]
19mod mir;
20#[cfg(feature = "fuzzing")]
21pub mod mir;
22mod parser;
23mod util;
24
25#[macro_use]
26#[allow(missing_docs)]
27mod macros;
28
29use generator::Generator;
30use graph::{DisambiguationError, Fork, Graph, Rope};
31use leaf::Leaf;
32use parser::{IgnoreFlags, Mode, Parser};
33use quote::ToTokens;
34use util::MaybeVoid;
35
36use proc_macro2::{Delimiter, TokenStream, TokenTree};
37use quote::quote;
38use syn::parse_quote;
39use syn::spanned::Spanned;
40use syn::{Fields, ItemEnum};
41
42const LOGOS_ATTR: &str = "logos";
43const ERROR_ATTR: &str = "error";
44const TOKEN_ATTR: &str = "token";
45const REGEX_ATTR: &str = "regex";
46
47/// Generate a `Logos` implementation for the given struct, provided as a stream of rust tokens.
48pub fn generate(input: TokenStream) -> TokenStream {
49    debug!("Reading input token streams");
50
51    let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
52
53    let name = &item.ident;
54
55    let mut parser = Parser::default();
56
57    for param in item.generics.params {
58        parser.parse_generic(param);
59    }
60
61    for attr in &mut item.attrs {
62        parser.try_parse_logos(attr);
63    }
64
65    let mut ropes = Vec::new();
66    let mut regex_ids = Vec::new();
67    let mut graph = Graph::new();
68
69    {
70        let errors = &mut parser.errors;
71
72        for literal in &parser.skips {
73            match literal.to_mir(&parser.subpatterns, IgnoreFlags::Empty, errors) {
74                Ok(mir) => {
75                    let then = graph.push(Leaf::new_skip(literal.span()).priority(mir.priority()));
76                    let id = graph.regex(mir, then);
77
78                    regex_ids.push(id);
79                }
80                Err(err) => {
81                    errors.err(err, literal.span());
82                }
83            }
84        }
85    }
86
87    debug!("Iterating through enum variants");
88
89    for variant in &mut item.variants {
90        let field = match &mut variant.fields {
91            Fields::Unit => MaybeVoid::Void,
92            Fields::Unnamed(fields) => {
93                if fields.unnamed.len() != 1 {
94                    parser.err(
95                        format!(
96                            "Logos currently only supports variants with one field, found {}",
97                            fields.unnamed.len(),
98                        ),
99                        fields.span(),
100                    );
101                }
102
103                let ty = &mut fields
104                    .unnamed
105                    .first_mut()
106                    .expect("Already checked len; qed")
107                    .ty;
108                let ty = parser.get_type(ty);
109
110                MaybeVoid::Some(ty)
111            }
112            Fields::Named(fields) => {
113                parser.err("Logos doesn't support named fields yet.", fields.span());
114
115                MaybeVoid::Void
116            }
117        };
118
119        // Lazy leaf constructor to avoid cloning
120        let var_ident = &variant.ident;
121        let leaf = move |span| Leaf::new(var_ident, span).field(field.clone());
122
123        for attr in &mut variant.attrs {
124            let attr_name = match attr.path().get_ident() {
125                Some(ident) => ident.to_string(),
126                None => continue,
127            };
128
129            match attr_name.as_str() {
130                ERROR_ATTR => {
131                    // TODO: Remove in future versions
132                    parser.err(
133                        "\
134                        Since 0.13 Logos no longer requires the #[error] variant.\n\
135                        \n\
136                        For help with migration see release notes: \
137                        https://github.com/maciejhirsz/logos/releases\
138                        ",
139                        attr.span(),
140                    );
141                }
142                TOKEN_ATTR => {
143                    let definition = match parser.parse_definition(attr) {
144                        Some(definition) => definition,
145                        None => {
146                            parser.err("Expected #[token(...)]", attr.span());
147                            continue;
148                        }
149                    };
150
151                    if definition.ignore_flags.is_empty() {
152                        let bytes = definition.literal.to_bytes();
153                        let then = graph.push(
154                            leaf(definition.literal.span())
155                                .priority(definition.priority.unwrap_or(bytes.len() * 2))
156                                .callback(definition.callback),
157                        );
158
159                        ropes.push(Rope::new(bytes, then));
160                    } else {
161                        let mir = definition
162                            .literal
163                            .escape_regex()
164                            .to_mir(
165                                &Default::default(),
166                                definition.ignore_flags,
167                                &mut parser.errors,
168                            )
169                            .expect("The literal should be perfectly valid regex");
170
171                        let then = graph.push(
172                            leaf(definition.literal.span())
173                                .priority(definition.priority.unwrap_or_else(|| mir.priority()))
174                                .callback(definition.callback),
175                        );
176                        let id = graph.regex(mir, then);
177
178                        regex_ids.push(id);
179                    }
180                }
181                REGEX_ATTR => {
182                    let definition = match parser.parse_definition(attr) {
183                        Some(definition) => definition,
184                        None => {
185                            parser.err("Expected #[regex(...)]", attr.span());
186                            continue;
187                        }
188                    };
189                    let mir = match definition.literal.to_mir(
190                        &parser.subpatterns,
191                        definition.ignore_flags,
192                        &mut parser.errors,
193                    ) {
194                        Ok(mir) => mir,
195                        Err(err) => {
196                            parser.err(err, definition.literal.span());
197                            continue;
198                        }
199                    };
200
201                    let then = graph.push(
202                        leaf(definition.literal.span())
203                            .priority(definition.priority.unwrap_or_else(|| mir.priority()))
204                            .callback(definition.callback),
205                    );
206                    let id = graph.regex(mir, then);
207
208                    regex_ids.push(id);
209                }
210                _ => (),
211            }
212        }
213    }
214
215    let mut root = Fork::new();
216
217    debug!("Parsing additional options (extras, source, ...)");
218
219    let error_type = parser.error_type.take();
220    let extras = parser.extras.take();
221    let source = parser
222        .source
223        .take()
224        .map(strip_wrapping_parens)
225        .unwrap_or(match parser.mode {
226            Mode::Utf8 => quote!(str),
227            Mode::Binary => quote!([u8]),
228        });
229    let logos_path = parser
230        .logos_path
231        .take()
232        .unwrap_or_else(|| parse_quote!(::logos));
233
234    let generics = parser.generics();
235    let this = quote!(#name #generics);
236
237    let impl_logos = |body| {
238        quote! {
239            impl<'s> #logos_path::Logos<'s> for #this {
240                type Error = #error_type;
241
242                type Extras = #extras;
243
244                type Source = #source;
245
246                fn lex(lex: &mut #logos_path::Lexer<'s, Self>) {
247                    #body
248                }
249            }
250        }
251    };
252
253    for id in regex_ids {
254        let fork = graph.fork_off(id);
255
256        root.merge(fork, &mut graph);
257    }
258    for rope in ropes {
259        root.merge(rope.into_fork(&mut graph), &mut graph);
260    }
261    while let Some(id) = root.miss.take() {
262        let fork = graph.fork_off(id);
263
264        if fork.branches().next().is_some() {
265            root.merge(fork, &mut graph);
266        } else {
267            break;
268        }
269    }
270
271    debug!("Checking if any two tokens have the same priority");
272
273    for &DisambiguationError(a, b) in graph.errors() {
274        let a = graph[a].unwrap_leaf();
275        let b = graph[b].unwrap_leaf();
276        let disambiguate = a.priority + 1;
277
278        let mut err = |a: &Leaf, b: &Leaf| {
279            parser.err(
280                format!(
281                    "\
282                    A definition of variant `{a}` can match the same input as another definition of variant `{b}`.\n\
283                    \n\
284                    hint: Consider giving one definition a higher priority: \
285                    #[regex(..., priority = {disambiguate})]\
286                    ",
287                ),
288                a.span
289            );
290        };
291
292        err(a, b);
293        err(b, a);
294    }
295
296    if let Some(errors) = parser.errors.render() {
297        return impl_logos(errors);
298    }
299
300    let root = graph.push(root);
301
302    graph.shake(root);
303
304    debug!("Generating code from graph:\n{graph:#?}");
305
306    let generator = Generator::new(name, &this, root, &graph);
307
308    let body = generator.generate();
309    impl_logos(quote! {
310        use #logos_path::internal::{LexerInternal, CallbackResult};
311
312        type Lexer<'s> = #logos_path::Lexer<'s, #this>;
313
314        fn _end<'s>(lex: &mut Lexer<'s>) {
315            lex.end()
316        }
317
318        fn _error<'s>(lex: &mut Lexer<'s>) {
319            lex.bump_unchecked(1);
320
321            lex.error();
322        }
323
324        #body
325    })
326}
327
328/// Strip all logos attributes from the given struct, allowing it to be used in code without `logos-derive` present.
329pub fn strip_attributes(input: TokenStream) -> TokenStream {
330    let mut item: ItemEnum = syn::parse2(input).expect("Logos can be only be derived for enums");
331
332    strip_attrs_from_vec(&mut item.attrs);
333
334    for attr in &mut item.attrs {
335        if let syn::Meta::List(meta) = &mut attr.meta {
336            if meta.path.is_ident("derive") {
337                let mut tokens =
338                    std::mem::replace(&mut meta.tokens, TokenStream::new()).into_iter();
339
340                while let Some(TokenTree::Ident(ident)) = tokens.next() {
341                    let punct = tokens.next();
342
343                    if ident == "Logos" {
344                        continue;
345                    }
346
347                    meta.tokens.extend([TokenTree::Ident(ident)]);
348                    meta.tokens.extend(punct);
349                }
350            }
351        }
352    }
353
354    for variant in &mut item.variants {
355        strip_attrs_from_vec(&mut variant.attrs);
356        for field in &mut variant.fields {
357            strip_attrs_from_vec(&mut field.attrs);
358        }
359    }
360
361    item.to_token_stream()
362}
363
364fn strip_attrs_from_vec(attrs: &mut Vec<syn::Attribute>) {
365    attrs.retain(|attr| !is_logos_attr(attr))
366}
367
368fn is_logos_attr(attr: &syn::Attribute) -> bool {
369    attr.path().is_ident(LOGOS_ATTR)
370        || attr.path().is_ident(TOKEN_ATTR)
371        || attr.path().is_ident(REGEX_ATTR)
372}
373
374fn strip_wrapping_parens(t: TokenStream) -> TokenStream {
375    let tts: Vec<TokenTree> = t.into_iter().collect();
376
377    if tts.len() != 1 {
378        tts.into_iter().collect()
379    } else {
380        match tts.into_iter().next().unwrap() {
381            TokenTree::Group(g) => {
382                if g.delimiter() == Delimiter::Parenthesis {
383                    g.stream()
384                } else {
385                    core::iter::once(TokenTree::Group(g)).collect()
386                }
387            }
388            tt => core::iter::once(tt).collect(),
389        }
390    }
391}