logos_codegen/parser/
subpattern.rs

1use proc_macro2::TokenStream;
2use syn::Ident;
3
4use crate::error::Errors;
5use crate::mir::Mir;
6use crate::parser::definition::{bytes_to_regex_string, Literal};
7
8#[derive(Default)]
9pub struct Subpatterns {
10    map: Vec<(Ident, String)>,
11}
12
13impl Subpatterns {
14    pub fn add(&mut self, param: Ident, pattern: TokenStream, errors: &mut Errors) {
15        let lit = match syn::parse2::<Literal>(pattern) {
16            Ok(lit) => lit,
17            Err(e) => {
18                errors.err(e.to_string(), e.span());
19                return;
20            }
21        };
22
23        if let Some((name, _)) = self.map.iter().find(|(name, _)| *name == param) {
24            errors
25                .err(format!("{} can only be assigned once", param), param.span())
26                .err("Previously assigned here", name.span());
27            return;
28        }
29
30        let fixed = self.fix(&lit, errors);
31
32        // Validate the literal as proper regex. If it's not, emit an error.
33        let mir = match &lit {
34            Literal::Utf8(_) => Mir::utf8(&fixed),
35            Literal::Bytes(_) => Mir::binary(&fixed),
36        };
37
38        if let Err(err) = mir {
39            errors.err(err, lit.span());
40        };
41
42        self.map.push((param, fixed));
43    }
44
45    pub fn fix(&self, lit: &Literal, errors: &mut Errors) -> String {
46        let mut i = 0;
47        let mut pattern = match lit {
48            Literal::Utf8(s) => s.value(),
49            Literal::Bytes(b) => bytes_to_regex_string(b.value()),
50        };
51
52        while let Some(f) = pattern[i..].find("(?&") {
53            i += f;
54            pattern.replace_range(i..i + 3, "(?:");
55            i += 3;
56
57            let subref_end = if let Some(f) = pattern[i..].find(')') {
58                i + f
59            } else {
60                pattern.truncate(i); // truncate so latter error doesn't suppress
61                break; // regex-syntax will report the unclosed group
62            };
63
64            let name = &pattern[i..subref_end];
65            let name = match syn::parse_str::<Ident>(name) {
66                Ok(name) => name,
67                Err(_) => {
68                    errors.err(
69                        format!("subpattern reference `{}` is not an identifier", name),
70                        lit.span(),
71                    );
72                    // we emitted the error; make something up and continue
73                    pattern.replace_range(i..subref_end, "_");
74                    i += 2;
75                    continue;
76                }
77            };
78
79            match self.map.iter().find(|(def, _)| *def == name) {
80                Some((_, subpattern)) => {
81                    pattern.replace_range(i..subref_end, subpattern);
82                    i += subpattern.len() + 1;
83                }
84                None => {
85                    errors.err(
86                        format!("subpattern reference `{}` has not been defined", name),
87                        lit.span(),
88                    );
89                    // leaving `(?:name)` is fine
90                    i = subref_end + 1;
91                }
92            }
93        }
94
95        pattern
96    }
97}