logos_codegen/parser/
mod.rs

1use beef::lean::Cow;
2use proc_macro2::{Span, TokenStream, TokenTree};
3use quote::quote;
4use syn::spanned::Spanned;
5use syn::{Attribute, GenericParam, Lit, Meta, Type};
6
7use crate::error::Errors;
8use crate::leaf::{Callback, InlineCallback};
9use crate::util::{expect_punct, MaybeVoid};
10use crate::LOGOS_ATTR;
11
12mod definition;
13mod ignore_flags;
14mod nested;
15mod subpattern;
16mod type_params;
17
18pub use self::definition::{Definition, Literal};
19pub use self::ignore_flags::IgnoreFlags;
20use self::nested::{AttributeParser, Nested, NestedValue};
21pub use self::subpattern::Subpatterns;
22use self::type_params::{replace_lifetime, traverse_type, TypeParams};
23
24#[derive(Default)]
25pub struct Parser {
26    pub errors: Errors,
27    pub mode: Mode,
28    pub source: Option<TokenStream>,
29    pub skips: Vec<Literal>,
30    pub extras: MaybeVoid,
31    pub error_type: MaybeVoid,
32    pub subpatterns: Subpatterns,
33    pub logos_path: Option<TokenStream>,
34    types: TypeParams,
35}
36
37#[derive(Default)]
38pub enum Mode {
39    #[default]
40    Utf8,
41    Binary,
42}
43
44impl Parser {
45    pub fn parse_generic(&mut self, param: GenericParam) {
46        match param {
47            GenericParam::Lifetime(lt) => {
48                self.types.explicit_lifetime(lt, &mut self.errors);
49            }
50            GenericParam::Type(ty) => {
51                self.types.add(ty.ident);
52            }
53            GenericParam::Const(c) => {
54                self.err("Logos doesn't support const generics.", c.span());
55            }
56        }
57    }
58
59    pub fn generics(&mut self) -> Option<TokenStream> {
60        self.types.generics(&mut self.errors)
61    }
62
63    fn parse_attr(&mut self, attr: &mut Attribute) -> Option<AttributeParser> {
64        match &mut attr.meta {
65            Meta::List(list) => {
66                let tokens = std::mem::replace(&mut list.tokens, TokenStream::new());
67
68                Some(AttributeParser::new(tokens))
69            }
70            _ => None,
71        }
72    }
73
74    /// Try to parse the main `#[logos(...)]`, does nothing if
75    /// the attribute's name isn't `logos`.
76    pub fn try_parse_logos(&mut self, attr: &mut Attribute) {
77        if !attr.path().is_ident(LOGOS_ATTR) {
78            return;
79        }
80
81        let nested = match self.parse_attr(attr) {
82            Some(tokens) => tokens,
83            None => {
84                self.err("Expected #[logos(...)]", attr.span());
85                return;
86            }
87        };
88
89        for nested in nested {
90            let (name, value) = match nested {
91                Nested::Named(name, value) => (name, value),
92                Nested::Unexpected(tokens) | Nested::Unnamed(tokens) => {
93                    self.err("Invalid nested attribute", tokens.span());
94                    continue;
95                }
96            };
97
98            // IMPORTANT: Keep these sorted alphabetically for binary search down the line
99            #[allow(clippy::type_complexity)]
100            static NESTED_LOOKUP: &[(&str, fn(&mut Parser, Span, NestedValue))] = &[
101                ("crate", |parser, span, value| match value {
102                    NestedValue::Assign(logos_path) => parser.logos_path = Some(logos_path),
103                    _ => {
104                        parser.err("Expected: #[logos(crate = path::to::logos)]", span);
105                    }
106                }),
107                ("error", |parser, span, value| match value {
108                    NestedValue::Assign(value) => {
109                        let span = value.span();
110
111                        if let MaybeVoid::Some(previous) = parser.error_type.replace(value) {
112                            parser
113                                .err("Error type can be defined only once", span)
114                                .err("Previous definition here", previous.span());
115                        }
116                    }
117                    _ => {
118                        parser.err("Expected: #[logos(error = SomeType)]", span);
119                    }
120                }),
121                ("extras", |parser, span, value| match value {
122                    NestedValue::Assign(value) => {
123                        let span = value.span();
124
125                        if let MaybeVoid::Some(previous) = parser.extras.replace(value) {
126                            parser
127                                .err("Extras can be defined only once", span)
128                                .err("Previous definition here", previous.span());
129                        }
130                    }
131                    _ => {
132                        parser.err("Expected: #[logos(extras = SomeType)]", span);
133                    }
134                }),
135                ("skip", |parser, span, value| match value {
136                    NestedValue::Literal(lit) => {
137                        if let Some(literal) = parser.parse_literal(Lit::new(lit)) {
138                            parser.skips.push(literal);
139                        }
140                    }
141                    _ => {
142                        parser.err("Expected: #[logos(skip \"regex literal\")]", span);
143                    }
144                }),
145                ("source", |parser, span, value| match value {
146                    NestedValue::Assign(value) => {
147                        let span = value.span();
148                        if let Some(previous) = parser.source.replace(value) {
149                            parser
150                                .err("Source can be defined only once", span)
151                                .err("Previous definition here", previous.span());
152                        }
153                    }
154                    _ => {
155                        parser.err("Expected: #[logos(source = SomeType)]", span);
156                    }
157                }),
158                ("subpattern", |parser, span, value| match value {
159                    NestedValue::KeywordAssign(name, value) => {
160                        parser.subpatterns.add(name, value, &mut parser.errors);
161                    }
162                    _ => {
163                        parser.err(r#"Expected: #[logos(subpattern name = r"regex")]"#, span);
164                    }
165                }),
166                ("type", |parser, span, value| match value {
167                    NestedValue::KeywordAssign(generic, ty) => {
168                        parser.types.set(generic, ty, &mut parser.errors);
169                    }
170                    _ => {
171                        parser.err("Expected: #[logos(type T = SomeType)]", span);
172                    }
173                }),
174            ];
175
176            match NESTED_LOOKUP.binary_search_by_key(&name.to_string().as_str(), |(n, _)| n) {
177                Ok(idx) => NESTED_LOOKUP[idx].1(self, name.span(), value),
178                Err(_) => {
179                    let mut err = format!(
180                        "Unknown nested attribute #[logos({name})], expected one of: {}",
181                        NESTED_LOOKUP[0].0
182                    );
183
184                    for (allowed, _) in &NESTED_LOOKUP[1..] {
185                        err.push_str(", ");
186                        err.push_str(allowed);
187                    }
188
189                    self.err(err, name.span());
190                }
191            }
192        }
193    }
194
195    pub fn parse_literal(&mut self, lit: Lit) -> Option<Literal> {
196        match lit {
197            Lit::Str(string) => Some(Literal::Utf8(string)),
198            Lit::ByteStr(bytes) => {
199                self.mode = Mode::Binary;
200
201                Some(Literal::Bytes(bytes))
202            }
203            _ => {
204                self.err("Expected a &str or &[u8] slice", lit.span());
205
206                None
207            }
208        }
209    }
210
211    /// Parse attribute definition of a token:
212    ///
213    /// + `#[token(literal[, callback])]`
214    /// + `#[regex(literal[, callback])]`
215    pub fn parse_definition(&mut self, attr: &mut Attribute) -> Option<Definition> {
216        let mut nested = self.parse_attr(attr)?;
217
218        let literal = match nested.parsed::<Lit>()? {
219            Ok(lit) => self.parse_literal(lit)?,
220            Err(err) => {
221                self.err(err.to_string(), err.span());
222
223                return None;
224            }
225        };
226
227        let mut def = Definition::new(literal);
228
229        for (position, next) in nested.enumerate() {
230            match next {
231                Nested::Unexpected(tokens) => {
232                    self.err("Unexpected token in attribute", tokens.span());
233                }
234                Nested::Unnamed(tokens) => match position {
235                    0 => def.callback = self.parse_callback(tokens),
236                    _ => {
237                        self.err(
238                            "\
239                            Expected a named argument at this position\n\
240                            \n\
241                            hint: If you are trying to define a callback here use: callback = ...\
242                            ",
243                            tokens.span(),
244                        );
245                    }
246                },
247                Nested::Named(name, value) => {
248                    def.named_attr(name, value, self);
249                }
250            }
251        }
252
253        Some(def)
254    }
255
256    fn parse_callback(&mut self, tokens: TokenStream) -> Option<Callback> {
257        let span = tokens.span();
258        let mut tokens = tokens.into_iter();
259
260        if let Some(tt) = expect_punct(tokens.next(), '|') {
261            let mut label = TokenStream::from(tt);
262
263            label.extend(tokens);
264
265            return Some(Callback::Label(label));
266        }
267
268        let first = tokens.next();
269        let error = expect_punct(tokens.next(), '|');
270
271        let arg = match (error, first) {
272            (None, Some(TokenTree::Ident(arg))) => arg,
273            _ => {
274                self.err(
275                    "Inline callbacks must use closure syntax with exactly one parameter",
276                    span,
277                );
278                return None;
279            }
280        };
281
282        let body = match tokens.next() {
283            Some(TokenTree::Group(group)) => group.stream(),
284            Some(first) => {
285                let mut body = TokenStream::from(first);
286
287                body.extend(tokens);
288                body
289            }
290            None => {
291                self.err("Callback missing a body", span);
292                return None;
293            }
294        };
295
296        let inline = InlineCallback { arg, body, span };
297
298        Some(inline.into())
299    }
300
301    /// Checks if `ty` is a declared generic param, if so replaces it
302    /// with a concrete type defined using #[logos(type T = Type)]
303    ///
304    /// If no matching generic param is found, all lifetimes are fixed
305    /// to the source lifetime
306    pub fn get_type(&self, ty: &mut Type) -> TokenStream {
307        traverse_type(ty, &mut |ty| {
308            if let Type::Path(tp) = ty {
309                // Skip types that begin with `self::`
310                if tp.qself.is_none() {
311                    // If `ty` is a generic type parameter, try to find
312                    // its concrete type defined with #[logos(type T = Type)]
313                    if let Some(substitute) = self.types.find(&tp.path) {
314                        *ty = substitute;
315                    }
316                }
317            }
318            // If `ty` is a concrete type, fix its lifetimes to 'source
319            replace_lifetime(ty);
320        });
321
322        quote!(#ty)
323    }
324
325    pub fn err<M>(&mut self, message: M, span: Span) -> &mut Errors
326    where
327        M: Into<Cow<'static, str>>,
328    {
329        self.errors.err(message, span)
330    }
331}