logos/
lib.rs

1//! <img src="https://raw.githubusercontent.com/maciejhirsz/logos/master/logos.svg?sanitize=true" alt="Logos logo" width="250" align="right">
2//!
3//! # Logos
4//!
5//! _Create ridiculously fast Lexers._
6//!
7//! **Logos** has two goals:
8//!
9//! + To make it easy to create a Lexer, so you can focus on more complex problems.
10//! + To make the generated Lexer faster than anything you'd write by hand.
11//!
12//! To achieve those, **Logos**:
13//!
14//! + Combines all token definitions into a single [deterministic state machine](https://en.wikipedia.org/wiki/Deterministic_finite_automaton).
15//! + Optimizes branches into [lookup tables](https://en.wikipedia.org/wiki/Lookup_table) or [jump tables](https://en.wikipedia.org/wiki/Branch_table).
16//! + Prevents [backtracking](https://en.wikipedia.org/wiki/ReDoS) inside token definitions.
17//! + [Unwinds loops](https://en.wikipedia.org/wiki/Loop_unrolling), and batches reads to minimize bounds checking.
18//! + Does all of that heavy lifting at compile time.
19//!
20//! See the [Logos handbook](https://maciejhirsz.github.io/logos/) for additional documentation and usage examples.
21#![cfg_attr(not(feature = "std"), no_std)]
22#![cfg_attr(docsrs, feature(doc_auto_cfg))]
23#![warn(missing_docs)]
24#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")]
25#![cfg_attr(feature = "forbid_unsafe", forbid(unsafe_code))]
26
27extern crate core;
28
29use core::fmt::Debug;
30#[cfg(feature = "export_derive")]
31pub use logos_derive::Logos;
32
33mod lexer;
34pub mod source;
35
36#[doc(hidden)]
37pub mod internal;
38
39pub use crate::lexer::{Lexer, Span, SpannedIter};
40pub use crate::source::Source;
41
42/// Trait implemented for an enum representing all tokens. You should never have
43/// to implement it manually, use the `#[derive(Logos)]` attribute on your enum.
44pub trait Logos<'source>: Sized {
45    /// Associated type `Extras` for the particular lexer. This can be set using
46    /// `#[logos(extras = MyExtras)]` and accessed inside callbacks.
47    type Extras;
48
49    /// Source type this token can be lexed from. This will default to `str`,
50    /// unless one of the defined patterns explicitly uses non-unicode byte values
51    /// or byte slices, in which case that implementation will use `[u8]`.
52    type Source: Source + ?Sized + 'source;
53
54    /// Error type returned by the lexer. This can be set using
55    /// `#[logos(error = MyError)]`. Defaults to `()` if not set.
56    type Error: Default + Clone + PartialEq + Debug + 'source;
57
58    /// The heart of Logos. Called by the `Lexer`. The implementation for this function
59    /// is generated by the `logos-derive` crate.
60    fn lex(lexer: &mut Lexer<'source, Self>);
61
62    /// Create a new instance of a `Lexer` that will produce tokens implementing
63    /// this `Logos`.
64    fn lexer(source: &'source Self::Source) -> Lexer<'source, Self>
65    where
66        Self::Extras: Default,
67    {
68        Lexer::new(source)
69    }
70
71    /// Create a new instance of a `Lexer` with the provided `Extras` that will
72    /// produce tokens implementing this `Logos`.
73    fn lexer_with_extras(
74        source: &'source Self::Source,
75        extras: Self::Extras,
76    ) -> Lexer<'source, Self> {
77        Lexer::with_extras(source, extras)
78    }
79}
80
81/// Type that can be returned from a callback, informing the `Lexer`, to skip
82/// current token match. See also [`logos::skip`](./fn.skip.html).
83///
84/// # Example
85///
86/// ```rust
87/// use logos::{Logos, Skip};
88///
89/// #[derive(Logos, Debug, PartialEq)]
90/// enum Token<'a> {
91///     // We will treat "abc" as if it was whitespace.
92///     // This is identical to using `logos::skip`.
93///     #[regex(" |abc", |_| Skip, priority = 3)]
94///     Ignored,
95///
96///     #[regex("[a-zA-Z]+")]
97///     Text(&'a str),
98/// }
99///
100/// let tokens: Vec<_> = Token::lexer("Hello abc world").collect();
101///
102/// assert_eq!(
103///     tokens,
104///     &[
105///         Ok(Token::Text("Hello")),
106///         Ok(Token::Text("world")),
107///     ],
108/// );
109/// ```
110pub struct Skip;
111
112/// Type that can be returned from a callback, either producing a field
113/// for a token, or skipping it.
114///
115/// # Example
116///
117/// ```rust
118/// use logos::{Logos, Filter};
119///
120/// #[derive(Logos, Debug, PartialEq)]
121/// enum Token {
122///     #[regex(r"[ \n\f\t]+", logos::skip)]
123///     Ignored,
124///
125///     #[regex("[0-9]+", |lex| {
126///         let n: u64 = lex.slice().parse().unwrap();
127///
128///         // Only emit a token if `n` is an even number
129///         match n % 2 {
130///             0 => Filter::Emit(n),
131///             _ => Filter::Skip,
132///         }
133///     })]
134///     EvenNumber(u64)
135/// }
136///
137/// let tokens: Vec<_> = Token::lexer("20 11 42 23 100 8002").collect();
138///
139/// assert_eq!(
140///     tokens,
141///     &[
142///         Ok(Token::EvenNumber(20)),
143///         // skipping 11
144///         Ok(Token::EvenNumber(42)),
145///         // skipping 23
146///         Ok(Token::EvenNumber(100)),
147///         Ok(Token::EvenNumber(8002))
148///     ]
149/// );
150/// ```
151pub enum Filter<T> {
152    /// Emit a token with a given value `T`. Use `()` for unit variants without fields.
153    Emit(T),
154    /// Skip current match, analog to [`Skip`](./struct.Skip.html).
155    Skip,
156}
157
158/// Type that can be returned from a callback, either producing a field
159/// for a token, skipping it, or emitting an error.
160///
161/// # Example
162///
163/// ```rust
164/// use logos::{Logos, FilterResult};
165///
166/// #[derive(Debug, PartialEq, Clone, Default)]
167/// enum LexingError {
168///     NumberParseError,
169///     NumberIsTen,
170///     #[default]
171///     Other,
172/// }
173///
174/// impl From<std::num::ParseIntError> for LexingError {
175///     fn from(_: std::num::ParseIntError) -> Self {
176///         LexingError::NumberParseError
177///     }
178/// }
179///
180/// #[derive(Logos, Debug, PartialEq)]
181/// #[logos(error = LexingError)]
182/// enum Token {
183///     #[regex(r"[ \n\f\t]+", logos::skip)]
184///     Ignored,
185///
186///     #[regex("[0-9]+", |lex| {
187///         let n: u64 = lex.slice().parse().unwrap();
188///
189///         // Only emit a token if `n` is an even number.
190///         if n % 2 == 0 {
191///             // Emit an error if `n` is 10.
192///             if n == 10 {
193///                 FilterResult::Error(LexingError::NumberIsTen)
194///             } else {
195///                 FilterResult::Emit(n)
196///             }
197///         } else {
198///             FilterResult::Skip
199///         }
200///     })]
201///     NiceEvenNumber(u64)
202/// }
203///
204/// let tokens: Vec<_> = Token::lexer("20 11 42 23 100 10").collect();
205///
206/// assert_eq!(
207///     tokens,
208///     &[
209///         Ok(Token::NiceEvenNumber(20)),
210///         // skipping 11
211///         Ok(Token::NiceEvenNumber(42)),
212///         // skipping 23
213///         Ok(Token::NiceEvenNumber(100)),
214///         // error at 10
215///         Err(LexingError::NumberIsTen),
216///     ]
217/// );
218/// ```
219pub enum FilterResult<T, E> {
220    /// Emit a token with a given value `T`. Use `()` for unit variants without fields.
221    Emit(T),
222    /// Skip current match, analog to [`Skip`](./struct.Skip.html).
223    Skip,
224    /// Emit a `<Token as Logos>::ERROR` token.
225    Error(E),
226}
227
228/// Predefined callback that will inform the `Lexer` to skip a definition.
229///
230/// # Example
231///
232/// ```rust
233/// use logos::Logos;
234///
235/// #[derive(Logos, Debug, PartialEq)]
236/// enum Token<'a> {
237///     // We will treat "abc" as if it was whitespace
238///     #[regex(" |abc", logos::skip, priority = 3)]
239///     Ignored,
240///
241///     #[regex("[a-zA-Z]+")]
242///     Text(&'a str),
243/// }
244///
245/// let tokens: Vec<_> = Token::lexer("Hello abc world").collect();
246///
247/// assert_eq!(
248///     tokens,
249///     &[
250///         Ok(Token::Text("Hello")),
251///         Ok(Token::Text("world")),
252///     ],
253/// );
254/// ```
255#[inline]
256pub fn skip<'source, Token: Logos<'source>>(_: &mut Lexer<'source, Token>) -> Skip {
257    Skip
258}
259
260#[cfg(doctest)]
261mod test_readme {
262    macro_rules! external_doc_test {
263        ($x:expr) => {
264            #[doc = $x]
265            extern "C" {}
266        };
267    }
268
269    external_doc_test!(include_str!("../README.md"));
270}