minijinja/
utils.rs

1use std::char::decode_utf16;
2use std::cmp::Ordering;
3use std::collections::BTreeMap;
4use std::fmt;
5use std::iter::{once, repeat};
6use std::str::Chars;
7
8use crate::error::{Error, ErrorKind};
9use crate::value::{StringType, UndefinedType, Value, ValueIter, ValueKind, ValueRepr};
10use crate::Output;
11
12/// internal marker to seal up some trait methods
13pub struct SealedMarker;
14
15pub fn memchr(haystack: &[u8], needle: u8) -> Option<usize> {
16    haystack.iter().position(|&x| x == needle)
17}
18
19pub fn memstr(haystack: &[u8], needle: &[u8]) -> Option<usize> {
20    haystack
21        .windows(needle.len())
22        .position(|window| window == needle)
23}
24
25/// Helper for dealing with untrusted size hints.
26#[inline(always)]
27pub(crate) fn untrusted_size_hint(value: usize) -> usize {
28    value.min(1024)
29}
30
31fn write_with_html_escaping(out: &mut Output, value: &Value) -> fmt::Result {
32    if let Some(s) = value.as_str() {
33        write!(out, "{}", HtmlEscape(s))
34    } else if matches!(
35        value.kind(),
36        ValueKind::Undefined | ValueKind::None | ValueKind::Bool | ValueKind::Number
37    ) {
38        write!(out, "{value}")
39    } else {
40        write!(out, "{}", HtmlEscape(&value.to_string()))
41    }
42}
43
44#[cold]
45fn invalid_autoescape(name: &str) -> Result<(), Error> {
46    Err(Error::new(
47        ErrorKind::InvalidOperation,
48        format!("Default formatter does not know how to format to custom format '{name}'"),
49    ))
50}
51
52#[cfg(feature = "json")]
53fn json_escape_write(out: &mut Output, value: &Value) -> Result<(), Error> {
54    let value = ok!(serde_json::to_string(&value).map_err(|err| {
55        Error::new(ErrorKind::BadSerialization, "unable to format to JSON").with_source(err)
56    }));
57    write!(out, "{value}").map_err(Error::from)
58}
59
60#[inline(always)]
61pub fn write_escaped(
62    out: &mut Output,
63    auto_escape: AutoEscape,
64    value: &Value,
65) -> Result<(), Error> {
66    // string strings bypass all of this
67    if let ValueRepr::String(ref s, StringType::Safe) = value.0 {
68        return out.write_str(s).map_err(Error::from);
69    }
70
71    match auto_escape {
72        AutoEscape::None => write!(out, "{value}").map_err(Error::from),
73        AutoEscape::Html => write_with_html_escaping(out, value).map_err(Error::from),
74        #[cfg(feature = "json")]
75        AutoEscape::Json => json_escape_write(out, value),
76        AutoEscape::Custom(name) => invalid_autoescape(name),
77    }
78}
79
80/// Controls the autoescaping behavior.
81///
82/// For more information see
83/// [`set_auto_escape_callback`](crate::Environment::set_auto_escape_callback).
84#[derive(Debug, Copy, Clone, PartialEq, Eq)]
85#[non_exhaustive]
86pub enum AutoEscape {
87    /// Do not apply auto escaping.
88    None,
89    /// Use HTML auto escaping rules.
90    ///
91    /// Any value will be converted into a string and the following characters
92    /// will be escaped in ways compatible to XML and HTML: `<`, `>`, `&`, `"`,
93    /// `'`, and `/`.
94    Html,
95    /// Use escaping rules suitable for JSON/JavaScript or YAML.
96    ///
97    /// Any value effectively ends up being serialized to JSON upon printing.  The
98    /// serialized values will be compatible with JavaScript and YAML as well.
99    #[cfg(feature = "json")]
100    #[cfg_attr(docsrs, doc(cfg(feature = "json")))]
101    Json,
102    /// A custom auto escape format.
103    ///
104    /// The default formatter does not know how to deal with a custom escaping
105    /// format and would error.  The use of these requires a custom formatter.
106    /// See [`set_formatter`](crate::Environment::set_formatter).
107    Custom(&'static str),
108}
109
110/// Defines the behavior of undefined values in the engine.
111///
112/// At present there are three types of behaviors available which mirror the
113/// behaviors that Jinja2 provides out of the box and an extra option called
114/// `SemiStrict` which is a slightly less strict undefined.
115#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)]
116#[non_exhaustive]
117pub enum UndefinedBehavior {
118    /// The default, somewhat lenient undefined behavior.
119    ///
120    /// * **printing:** allowed (returns empty string)
121    /// * **iteration:** allowed (returns empty array)
122    /// * **attribute access of undefined values:** fails
123    /// * **if true:** allowed (is considered false)
124    #[default]
125    Lenient,
126    /// Like `Lenient`, but also allows chaining of undefined lookups.
127    ///
128    /// * **printing:** allowed (returns empty string)
129    /// * **iteration:** allowed (returns empty array)
130    /// * **attribute access of undefined values:** allowed (returns [`undefined`](Value::UNDEFINED))
131    /// * **if true:** allowed (is considered false)
132    Chainable,
133    /// Like strict, but does not error when the undefined is checked for truthyness.
134    ///
135    /// * **printing:** fails
136    /// * **iteration:** fails
137    /// * **attribute access of undefined values:** fails
138    /// * **if true:** allowed (is considered false)
139    SemiStrict,
140    /// Complains very quickly about undefined values.
141    ///
142    /// * **printing:** fails
143    /// * **iteration:** fails
144    /// * **attribute access of undefined values:** fails
145    /// * **if true:** fails
146    Strict,
147}
148
149impl UndefinedBehavior {
150    /// Utility method used in the engine to determine what to do when an undefined is
151    /// encountered.
152    ///
153    /// The flag indicates if this is the first or second level of undefined value.  The
154    /// parent value is passed too.
155    pub(crate) fn handle_undefined(self, parent_was_undefined: bool) -> Result<Value, Error> {
156        match (self, parent_was_undefined) {
157            (UndefinedBehavior::Lenient, false)
158            | (UndefinedBehavior::Strict, false)
159            | (UndefinedBehavior::SemiStrict, false)
160            | (UndefinedBehavior::Chainable, _) => Ok(Value::UNDEFINED),
161            (UndefinedBehavior::Lenient, true)
162            | (UndefinedBehavior::Strict, true)
163            | (UndefinedBehavior::SemiStrict, true) => Err(Error::from(ErrorKind::UndefinedError)),
164        }
165    }
166
167    /// Utility method to check if something is true.
168    ///
169    /// This fails only for strict undefined values.
170    #[inline]
171    pub(crate) fn is_true(self, value: &Value) -> Result<bool, Error> {
172        match (self, &value.0) {
173            // silent undefined doesn't error, even in strict mode
174            (UndefinedBehavior::Strict, &ValueRepr::Undefined(UndefinedType::Default)) => {
175                Err(Error::from(ErrorKind::UndefinedError))
176            }
177            _ => Ok(value.is_true()),
178        }
179    }
180
181    /// Tries to iterate over a value while handling the undefined value.
182    ///
183    /// If the value is undefined, then iteration fails if the behavior is set to strict,
184    /// otherwise it succeeds with an empty iteration.  This is also internally used in the
185    /// engine to convert values to lists.
186    #[inline]
187    pub(crate) fn try_iter(self, value: Value) -> Result<ValueIter, Error> {
188        self.assert_iterable(&value).and_then(|_| value.try_iter())
189    }
190
191    /// Are we strict on iteration?
192    #[inline]
193    pub(crate) fn assert_iterable(self, value: &Value) -> Result<(), Error> {
194        match (self, &value.0) {
195            // silent undefined doesn't error, even in strict mode
196            (
197                UndefinedBehavior::Strict | UndefinedBehavior::SemiStrict,
198                &ValueRepr::Undefined(UndefinedType::Default),
199            ) => Err(Error::from(ErrorKind::UndefinedError)),
200            _ => Ok(()),
201        }
202    }
203}
204
205/// Helper to HTML escape a string.
206pub struct HtmlEscape<'a>(pub &'a str);
207
208impl fmt::Display for HtmlEscape<'_> {
209    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
210        #[cfg(feature = "v_htmlescape")]
211        {
212            fmt::Display::fmt(&v_htmlescape::escape(self.0), f)
213        }
214        // this is taken from askama-escape
215        #[cfg(not(feature = "v_htmlescape"))]
216        {
217            let bytes = self.0.as_bytes();
218            let mut start = 0;
219
220            for (i, b) in bytes.iter().enumerate() {
221                macro_rules! escaping_body {
222                    ($quote:expr) => {{
223                        if start < i {
224                            // SAFETY: this is safe because we only push valid utf-8 bytes over
225                            ok!(f.write_str(unsafe {
226                                std::str::from_utf8_unchecked(&bytes[start..i])
227                            }));
228                        }
229                        ok!(f.write_str($quote));
230                        start = i + 1;
231                    }};
232                }
233                if b.wrapping_sub(b'"') <= b'>' - b'"' {
234                    match *b {
235                        b'<' => escaping_body!("&lt;"),
236                        b'>' => escaping_body!("&gt;"),
237                        b'&' => escaping_body!("&amp;"),
238                        b'"' => escaping_body!("&quot;"),
239                        b'\'' => escaping_body!("&#x27;"),
240                        b'/' => escaping_body!("&#x2f;"),
241                        _ => (),
242                    }
243                }
244            }
245
246            if start < bytes.len() {
247                // SAFETY: this is safe because we only push valid utf-8 bytes over
248                f.write_str(unsafe { std::str::from_utf8_unchecked(&bytes[start..]) })
249            } else {
250                Ok(())
251            }
252        }
253    }
254}
255
256struct Unescaper {
257    out: String,
258    pending_surrogate: u16,
259}
260
261impl Unescaper {
262    fn unescape(mut self, s: &str) -> Result<String, Error> {
263        let mut char_iter = s.chars();
264
265        while let Some(c) = char_iter.next() {
266            if c == '\\' {
267                match char_iter.next() {
268                    None => return Err(ErrorKind::BadEscape.into()),
269                    Some(d) => match d {
270                        '"' | '\\' | '/' | '\'' => ok!(self.push_char(d)),
271                        'b' => ok!(self.push_char('\x08')),
272                        'f' => ok!(self.push_char('\x0C')),
273                        'n' => ok!(self.push_char('\n')),
274                        'r' => ok!(self.push_char('\r')),
275                        't' => ok!(self.push_char('\t')),
276                        'u' => {
277                            let val = ok!(self.parse_u16(&mut char_iter));
278                            ok!(self.push_u16(val));
279                        }
280                        _ => return Err(ErrorKind::BadEscape.into()),
281                    },
282                }
283            } else {
284                ok!(self.push_char(c));
285            }
286        }
287
288        if self.pending_surrogate != 0 {
289            Err(ErrorKind::BadEscape.into())
290        } else {
291            Ok(self.out)
292        }
293    }
294
295    fn parse_u16(&self, chars: &mut Chars) -> Result<u16, Error> {
296        let hexnum = chars.chain(repeat('\0')).take(4).collect::<String>();
297        u16::from_str_radix(&hexnum, 16).map_err(|_| ErrorKind::BadEscape.into())
298    }
299
300    fn push_u16(&mut self, c: u16) -> Result<(), Error> {
301        match (self.pending_surrogate, (0xD800..=0xDFFF).contains(&c)) {
302            (0, false) => match decode_utf16(once(c)).next() {
303                Some(Ok(c)) => self.out.push(c),
304                _ => return Err(ErrorKind::BadEscape.into()),
305            },
306            (_, false) => return Err(ErrorKind::BadEscape.into()),
307            (0, true) => self.pending_surrogate = c,
308            (prev, true) => match decode_utf16(once(prev).chain(once(c))).next() {
309                Some(Ok(c)) => {
310                    self.out.push(c);
311                    self.pending_surrogate = 0;
312                }
313                _ => return Err(ErrorKind::BadEscape.into()),
314            },
315        }
316        Ok(())
317    }
318
319    fn push_char(&mut self, c: char) -> Result<(), Error> {
320        if self.pending_surrogate != 0 {
321            Err(ErrorKind::BadEscape.into())
322        } else {
323            self.out.push(c);
324            Ok(())
325        }
326    }
327}
328
329/// Un-escape a string, following JSON rules.
330pub fn unescape(s: &str) -> Result<String, Error> {
331    Unescaper {
332        out: String::new(),
333        pending_surrogate: 0,
334    }
335    .unescape(s)
336}
337
338pub struct BTreeMapKeysDebug<'a, K: fmt::Debug, V>(pub &'a BTreeMap<K, V>);
339
340impl<K: fmt::Debug, V> fmt::Debug for BTreeMapKeysDebug<'_, K, V> {
341    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
342        f.debug_list().entries(self.0.iter().map(|x| x.0)).finish()
343    }
344}
345
346pub struct OnDrop<F: FnOnce()>(Option<F>);
347
348impl<F: FnOnce()> OnDrop<F> {
349    pub fn new(f: F) -> Self {
350        Self(Some(f))
351    }
352}
353
354impl<F: FnOnce()> Drop for OnDrop<F> {
355    fn drop(&mut self) {
356        self.0.take().unwrap()();
357    }
358}
359
360#[cfg(feature = "builtins")]
361pub fn splitn_whitespace(s: &str, maxsplits: usize) -> impl Iterator<Item = &str> + '_ {
362    let mut splits = 1;
363    let mut skip_ws = true;
364    let mut split_start = None;
365    let mut last_split_end = 0;
366    let mut chars = s.char_indices();
367
368    std::iter::from_fn(move || {
369        for (idx, c) in chars.by_ref() {
370            if splits >= maxsplits && !skip_ws {
371                continue;
372            } else if c.is_whitespace() {
373                if let Some(old) = split_start {
374                    let rv = &s[old..idx];
375                    split_start = None;
376                    last_split_end = idx;
377                    splits += 1;
378                    skip_ws = true;
379                    return Some(rv);
380                }
381            } else {
382                skip_ws = false;
383                if split_start.is_none() {
384                    split_start = Some(idx);
385                    last_split_end = idx;
386                }
387            }
388        }
389
390        let rest = &s[last_split_end..];
391        if !rest.is_empty() {
392            last_split_end = s.len();
393            Some(rest)
394        } else {
395            None
396        }
397    })
398}
399
400/// Because the Python crate violates our ordering guarantees by design
401/// we want to catch failed sorts in a landing pad.  This is not ideal but
402/// it at least gives us error context for when invalid search operations
403/// are taking place.
404#[cfg_attr(not(feature = "internal_safe_search"), inline)]
405pub fn safe_sort<T, F>(seq: &mut [T], f: F) -> Result<(), Error>
406where
407    F: FnMut(&T, &T) -> Ordering,
408{
409    #[cfg(feature = "internal_safe_search")]
410    {
411        if let Err(panic) = std::panic::catch_unwind(std::panic::AssertUnwindSafe(move || {
412            seq.sort_by(f);
413        })) {
414            let msg = panic
415                .downcast_ref::<&str>()
416                .copied()
417                .or_else(|| panic.downcast_ref::<String>().map(|x| x.as_str()));
418            return Err(Error::new(
419                ErrorKind::InvalidOperation,
420                format!(
421                    "failed to sort: {}",
422                    msg.unwrap_or("comparator does not implement total order")
423                ),
424            ));
425        }
426    }
427    #[cfg(not(feature = "internal_safe_search"))]
428    {
429        seq.sort_by(f);
430    }
431    Ok(())
432}
433
434#[cfg(test)]
435mod tests {
436    use super::*;
437
438    use similar_asserts::assert_eq;
439
440    #[test]
441    fn test_html_escape() {
442        let input = "<>&\"'/";
443        let output = HtmlEscape(input).to_string();
444        assert_eq!(output, "&lt;&gt;&amp;&quot;&#x27;&#x2f;");
445    }
446
447    #[test]
448    fn test_unescape() {
449        assert_eq!(unescape(r"foo\u2603bar").unwrap(), "foo\u{2603}bar");
450        assert_eq!(unescape(r"\t\b\f\r\n\\\/").unwrap(), "\t\x08\x0c\r\n\\/");
451        assert_eq!(unescape("foobarbaz").unwrap(), "foobarbaz");
452        assert_eq!(unescape(r"\ud83d\udca9").unwrap(), "💩");
453    }
454
455    #[test]
456    #[cfg(feature = "builtins")]
457    fn test_splitn_whitespace() {
458        fn s(s: &str, n: usize) -> Vec<&str> {
459            splitn_whitespace(s, n).collect::<Vec<_>>()
460        }
461
462        assert_eq!(s("a b c", 1), vec!["a b c"]);
463        assert_eq!(s("a b c", 2), vec!["a", "b c"]);
464        assert_eq!(s("a    b c", 2), vec!["a", "b c"]);
465        assert_eq!(s("a    b c   ", 2), vec!["a", "b c   "]);
466        assert_eq!(s("a   b   c", 3), vec!["a", "b", "c"]);
467        assert_eq!(s("a   b   c", 4), vec!["a", "b", "c"]);
468        assert_eq!(s("   a   b   c", 3), vec!["a", "b", "c"]);
469        assert_eq!(s("   a   b   c", 4), vec!["a", "b", "c"]);
470    }
471}