logos/lib.rs
1//! <img src="https://raw.githubusercontent.com/maciejhirsz/logos/master/logos.svg?sanitize=true" alt="Logos logo" width="250" align="right">
2//!
3//! # Logos
4//!
5//! _Create ridiculously fast Lexers._
6//!
7//! **Logos** has two goals:
8//!
9//! + To make it easy to create a Lexer, so you can focus on more complex problems.
10//! + To make the generated Lexer faster than anything you'd write by hand.
11//!
12//! To achieve those, **Logos**:
13//!
14//! + Combines all token definitions into a single [deterministic state machine](https://en.wikipedia.org/wiki/Deterministic_finite_automaton).
15//! + Optimizes branches into [lookup tables](https://en.wikipedia.org/wiki/Lookup_table) or [jump tables](https://en.wikipedia.org/wiki/Branch_table).
16//! + Prevents [backtracking](https://en.wikipedia.org/wiki/ReDoS) inside token definitions.
17//! + [Unwinds loops](https://en.wikipedia.org/wiki/Loop_unrolling), and batches reads to minimize bounds checking.
18//! + Does all of that heavy lifting at compile time.
19//!
20//! See the [Logos handbook](https://maciejhirsz.github.io/logos/) for additional documentation and usage examples.
21#![cfg_attr(not(feature = "std"), no_std)]
22#![cfg_attr(docsrs, feature(doc_auto_cfg))]
23#![warn(missing_docs)]
24#![doc(html_logo_url = "https://maciej.codes/kosz/logos.png")]
25#![cfg_attr(feature = "forbid_unsafe", forbid(unsafe_code))]
26
27extern crate core;
28
29use core::fmt::Debug;
30#[cfg(feature = "export_derive")]
31pub use logos_derive::Logos;
32
33mod lexer;
34pub mod source;
35
36#[doc(hidden)]
37pub mod internal;
38
39pub use crate::lexer::{Lexer, Span, SpannedIter};
40pub use crate::source::Source;
41
42/// Trait implemented for an enum representing all tokens. You should never have
43/// to implement it manually, use the `#[derive(Logos)]` attribute on your enum.
44pub trait Logos<'source>: Sized {
45 /// Associated type `Extras` for the particular lexer. This can be set using
46 /// `#[logos(extras = MyExtras)]` and accessed inside callbacks.
47 type Extras;
48
49 /// Source type this token can be lexed from. This will default to `str`,
50 /// unless one of the defined patterns explicitly uses non-unicode byte values
51 /// or byte slices, in which case that implementation will use `[u8]`.
52 type Source: Source + ?Sized + 'source;
53
54 /// Error type returned by the lexer. This can be set using
55 /// `#[logos(error = MyError)]`. Defaults to `()` if not set.
56 type Error: Default + Clone + PartialEq + Debug + 'source;
57
58 /// The heart of Logos. Called by the `Lexer`. The implementation for this function
59 /// is generated by the `logos-derive` crate.
60 fn lex(lexer: &mut Lexer<'source, Self>);
61
62 /// Create a new instance of a `Lexer` that will produce tokens implementing
63 /// this `Logos`.
64 fn lexer(source: &'source Self::Source) -> Lexer<'source, Self>
65 where
66 Self::Extras: Default,
67 {
68 Lexer::new(source)
69 }
70
71 /// Create a new instance of a `Lexer` with the provided `Extras` that will
72 /// produce tokens implementing this `Logos`.
73 fn lexer_with_extras(
74 source: &'source Self::Source,
75 extras: Self::Extras,
76 ) -> Lexer<'source, Self> {
77 Lexer::with_extras(source, extras)
78 }
79}
80
81/// Type that can be returned from a callback, informing the `Lexer`, to skip
82/// current token match. See also [`logos::skip`](./fn.skip.html).
83///
84/// # Example
85///
86/// ```rust
87/// use logos::{Logos, Skip};
88///
89/// #[derive(Logos, Debug, PartialEq)]
90/// enum Token<'a> {
91/// // We will treat "abc" as if it was whitespace.
92/// // This is identical to using `logos::skip`.
93/// #[regex(" |abc", |_| Skip, priority = 3)]
94/// Ignored,
95///
96/// #[regex("[a-zA-Z]+")]
97/// Text(&'a str),
98/// }
99///
100/// let tokens: Vec<_> = Token::lexer("Hello abc world").collect();
101///
102/// assert_eq!(
103/// tokens,
104/// &[
105/// Ok(Token::Text("Hello")),
106/// Ok(Token::Text("world")),
107/// ],
108/// );
109/// ```
110pub struct Skip;
111
112/// Type that can be returned from a callback, either producing a field
113/// for a token, or skipping it.
114///
115/// # Example
116///
117/// ```rust
118/// use logos::{Logos, Filter};
119///
120/// #[derive(Logos, Debug, PartialEq)]
121/// enum Token {
122/// #[regex(r"[ \n\f\t]+", logos::skip)]
123/// Ignored,
124///
125/// #[regex("[0-9]+", |lex| {
126/// let n: u64 = lex.slice().parse().unwrap();
127///
128/// // Only emit a token if `n` is an even number
129/// match n % 2 {
130/// 0 => Filter::Emit(n),
131/// _ => Filter::Skip,
132/// }
133/// })]
134/// EvenNumber(u64)
135/// }
136///
137/// let tokens: Vec<_> = Token::lexer("20 11 42 23 100 8002").collect();
138///
139/// assert_eq!(
140/// tokens,
141/// &[
142/// Ok(Token::EvenNumber(20)),
143/// // skipping 11
144/// Ok(Token::EvenNumber(42)),
145/// // skipping 23
146/// Ok(Token::EvenNumber(100)),
147/// Ok(Token::EvenNumber(8002))
148/// ]
149/// );
150/// ```
151pub enum Filter<T> {
152 /// Emit a token with a given value `T`. Use `()` for unit variants without fields.
153 Emit(T),
154 /// Skip current match, analog to [`Skip`](./struct.Skip.html).
155 Skip,
156}
157
158/// Type that can be returned from a callback, either producing a field
159/// for a token, skipping it, or emitting an error.
160///
161/// # Example
162///
163/// ```rust
164/// use logos::{Logos, FilterResult};
165///
166/// #[derive(Debug, PartialEq, Clone, Default)]
167/// enum LexingError {
168/// NumberParseError,
169/// NumberIsTen,
170/// #[default]
171/// Other,
172/// }
173///
174/// impl From<std::num::ParseIntError> for LexingError {
175/// fn from(_: std::num::ParseIntError) -> Self {
176/// LexingError::NumberParseError
177/// }
178/// }
179///
180/// #[derive(Logos, Debug, PartialEq)]
181/// #[logos(error = LexingError)]
182/// enum Token {
183/// #[regex(r"[ \n\f\t]+", logos::skip)]
184/// Ignored,
185///
186/// #[regex("[0-9]+", |lex| {
187/// let n: u64 = lex.slice().parse().unwrap();
188///
189/// // Only emit a token if `n` is an even number.
190/// if n % 2 == 0 {
191/// // Emit an error if `n` is 10.
192/// if n == 10 {
193/// FilterResult::Error(LexingError::NumberIsTen)
194/// } else {
195/// FilterResult::Emit(n)
196/// }
197/// } else {
198/// FilterResult::Skip
199/// }
200/// })]
201/// NiceEvenNumber(u64)
202/// }
203///
204/// let tokens: Vec<_> = Token::lexer("20 11 42 23 100 10").collect();
205///
206/// assert_eq!(
207/// tokens,
208/// &[
209/// Ok(Token::NiceEvenNumber(20)),
210/// // skipping 11
211/// Ok(Token::NiceEvenNumber(42)),
212/// // skipping 23
213/// Ok(Token::NiceEvenNumber(100)),
214/// // error at 10
215/// Err(LexingError::NumberIsTen),
216/// ]
217/// );
218/// ```
219pub enum FilterResult<T, E> {
220 /// Emit a token with a given value `T`. Use `()` for unit variants without fields.
221 Emit(T),
222 /// Skip current match, analog to [`Skip`](./struct.Skip.html).
223 Skip,
224 /// Emit a `<Token as Logos>::ERROR` token.
225 Error(E),
226}
227
228/// Predefined callback that will inform the `Lexer` to skip a definition.
229///
230/// # Example
231///
232/// ```rust
233/// use logos::Logos;
234///
235/// #[derive(Logos, Debug, PartialEq)]
236/// enum Token<'a> {
237/// // We will treat "abc" as if it was whitespace
238/// #[regex(" |abc", logos::skip, priority = 3)]
239/// Ignored,
240///
241/// #[regex("[a-zA-Z]+")]
242/// Text(&'a str),
243/// }
244///
245/// let tokens: Vec<_> = Token::lexer("Hello abc world").collect();
246///
247/// assert_eq!(
248/// tokens,
249/// &[
250/// Ok(Token::Text("Hello")),
251/// Ok(Token::Text("world")),
252/// ],
253/// );
254/// ```
255#[inline]
256pub fn skip<'source, Token: Logos<'source>>(_: &mut Lexer<'source, Token>) -> Skip {
257 Skip
258}
259
260#[cfg(doctest)]
261mod test_readme {
262 macro_rules! external_doc_test {
263 ($x:expr) => {
264 #[doc = $x]
265 extern "C" {}
266 };
267 }
268
269 external_doc_test!(include_str!("../README.md"));
270}