mownstr/
lib.rs

1//! [`MownStr`]
2//! is either a borrowed reference to a `str` or an own `Box<str>`.
3
4use std::borrow::Cow;
5use std::fmt;
6use std::hash;
7use std::marker::PhantomData;
8use std::ops::Deref;
9use std::ptr::NonNull;
10use std::slice;
11use std::str;
12
13/// "Maybe own str":
14/// either a borrowed reference to a `str` or an owned `Box<str>`.
15///
16/// It does not try to be mutable, nor generic,
17/// which makes it lighter than, for example, `Cow<str>`.
18///
19/// # Panic
20/// The drawback is that `MownStr`
21/// does not support strings with a length > `usize::MAX/2`.
22/// Trying to convert such a large string to a `MownStr` will panic.
23pub struct MownStr<'a> {
24    addr: NonNull<u8>,
25    xlen: usize,
26    _phd: PhantomData<&'a str>,
27}
28
29// MownStr does not implement `Sync` and `Send` by default,
30// because NonNull<u8> does not.
31// However, it is safe to declare it as Sync and Send,
32// because MownStr is basically nothing more than a `&str`,
33// or a `Box<str>`, and both are `Sync` and `Send`.
34unsafe impl Sync for MownStr<'_> {}
35unsafe impl Send for MownStr<'_> {}
36
37const LEN_MASK: usize = usize::MAX >> 1;
38const OWN_FLAG: usize = !LEN_MASK;
39
40impl<'a> MownStr<'a> {
41    pub const fn from_str(other: &'a str) -> MownStr<'a> {
42        assert!(other.len() <= LEN_MASK);
43        // NB: The only 'const' constuctor for NonNull is new_unchecked
44        // so we need an unsafe block.
45
46        // SAFETY: we need a *mut u8 for new_unchecked,
47        //         but MownStr will never mutate its content
48        let ptr = other.as_ptr() as *mut u8;
49        let addr = unsafe {
50            // SAFETY: ptr can not be null,
51            NonNull::new_unchecked(ptr)
52        };
53        MownStr {
54            addr,
55            xlen: other.len(),
56            _phd: PhantomData,
57        }
58    }
59
60    pub const fn is_borrowed(&self) -> bool {
61        (self.xlen & OWN_FLAG) == 0
62    }
63
64    pub const fn is_owned(&self) -> bool {
65        (self.xlen & OWN_FLAG) == OWN_FLAG
66    }
67
68    pub const fn borrowed(&self) -> MownStr {
69        MownStr {
70            addr: self.addr,
71            xlen: self.xlen & LEN_MASK,
72            _phd: PhantomData,
73        }
74    }
75
76    #[inline]
77    fn real_len(&self) -> usize {
78        self.xlen & LEN_MASK
79    }
80
81    #[inline]
82    unsafe fn make_ref(&self) -> &'a str {
83        debug_assert!(self.is_borrowed(), "make_ref() called on owned MownStr");
84        let ptr = self.addr.as_ptr();
85        let slice = slice::from_raw_parts(ptr, self.xlen);
86        str::from_utf8_unchecked(slice)
87    }
88
89    /// Convert an *owned* MownStr to a box.
90    //
91    // NB: conceptually this method consumes the Mownstr.
92    // The reason why self is a mutable ref instead of a move is purely technical
93    // (to make it usable in Drop::drop()).
94    #[inline]
95    unsafe fn extract_box(&mut self) -> Box<str> {
96        debug_assert!(self.is_owned(), "extract_box() called on borrowed MownStr");
97        // extract data to make box
98        let ptr = self.addr.as_ptr();
99        let len = self.real_len();
100        // turn to borrowed, to avoid double-free
101        self.xlen = 0;
102        debug_assert!(self.is_borrowed());
103        // make box
104        let slice = slice::from_raw_parts_mut(ptr, len);
105        let raw = str::from_utf8_unchecked_mut(slice) as *mut str;
106        Box::from_raw(raw)
107    }
108}
109
110impl<'a> Drop for MownStr<'a> {
111    fn drop(&mut self) {
112        if self.is_owned() {
113            unsafe {
114                std::mem::drop(self.extract_box());
115            }
116        }
117    }
118}
119
120impl<'a> Clone for MownStr<'a> {
121    fn clone(&self) -> MownStr<'a> {
122        if self.is_owned() {
123            Box::<str>::from(self.deref()).into()
124        } else {
125            MownStr {
126                addr: self.addr,
127                xlen: self.xlen,
128                _phd: self._phd,
129            }
130        }
131    }
132}
133
134// Construct a MownStr
135
136impl<'a> From<&'a str> for MownStr<'a> {
137    fn from(other: &'a str) -> MownStr<'a> {
138        Self::from_str(other)
139    }
140}
141
142impl<'a> From<Box<str>> for MownStr<'a> {
143    fn from(mut other: Box<str>) -> MownStr<'a> {
144        let len = other.len();
145        assert!(len <= LEN_MASK);
146        let addr = other.as_mut_ptr();
147        let addr = unsafe {
148            // SAFETY: ptr can not be null,
149            NonNull::new_unchecked(addr)
150        };
151
152        std::mem::forget(other);
153
154        let xlen = len | OWN_FLAG;
155        let _phd = PhantomData;
156        MownStr { addr, xlen, _phd }
157    }
158}
159
160impl<'a> From<String> for MownStr<'a> {
161    fn from(other: String) -> MownStr<'a> {
162        other.into_boxed_str().into()
163    }
164}
165
166impl<'a> From<Cow<'a, str>> for MownStr<'a> {
167    fn from(other: Cow<'a, str>) -> MownStr<'a> {
168        match other {
169            Cow::Borrowed(r) => r.into(),
170            Cow::Owned(s) => s.into(),
171        }
172    }
173}
174
175// Using a MownStr as a str
176
177impl<'a> Deref for MownStr<'a> {
178    type Target = str;
179
180    fn deref(&self) -> &str {
181        let ptr = self.addr.as_ptr();
182        let len = self.real_len();
183        unsafe {
184            let slice = slice::from_raw_parts(ptr, len);
185            str::from_utf8_unchecked(slice)
186        }
187    }
188}
189
190impl<'a> AsRef<str> for MownStr<'a> {
191    fn as_ref(&self) -> &str {
192        self.deref()
193    }
194}
195
196impl<'a> std::borrow::Borrow<str> for MownStr<'a> {
197    fn borrow(&self) -> &str {
198        self.deref()
199    }
200}
201
202// Comparing between MownStr
203
204impl<'a> hash::Hash for MownStr<'a> {
205    fn hash<H: hash::Hasher>(&self, state: &mut H) {
206        self.deref().hash(state)
207    }
208}
209
210impl<'a> PartialEq for MownStr<'a> {
211    fn eq(&self, other: &MownStr<'a>) -> bool {
212        self.deref() == other.deref()
213    }
214}
215
216impl<'a> Eq for MownStr<'a> {}
217
218impl<'a> PartialOrd for MownStr<'a> {
219    fn partial_cmp(&self, other: &MownStr<'a>) -> Option<std::cmp::Ordering> {
220        self.deref().partial_cmp(other.deref())
221    }
222}
223
224impl<'a> Ord for MownStr<'a> {
225    fn cmp(&self, other: &MownStr<'a>) -> std::cmp::Ordering {
226        self.deref().cmp(other.deref())
227    }
228}
229
230// Comparing MownStr with str
231
232impl<'a> PartialEq<&'a str> for MownStr<'a> {
233    fn eq(&self, other: &&'a str) -> bool {
234        self.deref() == *other
235    }
236}
237
238impl<'a> PartialOrd<&'a str> for MownStr<'a> {
239    fn partial_cmp(&self, other: &&'a str) -> Option<std::cmp::Ordering> {
240        self.deref().partial_cmp(*other)
241    }
242}
243
244impl<'a> PartialEq<MownStr<'a>> for &'a str {
245    fn eq(&self, other: &MownStr<'a>) -> bool {
246        self == &other.deref()
247    }
248}
249
250impl<'a> PartialOrd<MownStr<'a>> for &'a str {
251    fn partial_cmp(&self, other: &MownStr<'a>) -> Option<std::cmp::Ordering> {
252        self.partial_cmp(&other.deref())
253    }
254}
255
256// Formatting
257
258impl<'a> fmt::Debug for MownStr<'a> {
259    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
260        fmt::Debug::fmt(self.deref(), f)
261    }
262}
263
264impl<'a> fmt::Display for MownStr<'a> {
265    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
266        fmt::Display::fmt(self.deref(), f)
267    }
268}
269
270// Converting
271
272impl<'a> From<MownStr<'a>> for Box<str> {
273    fn from(other: MownStr<'a>) -> Box<str> {
274        other.to()
275    }
276}
277
278impl<'a> From<MownStr<'a>> for String {
279    fn from(other: MownStr<'a>) -> String {
280        other.to()
281    }
282}
283
284impl<'a> From<MownStr<'a>> for Cow<'a, str> {
285    fn from(other: MownStr<'a>) -> Cow<'a, str> {
286        if other.is_owned() {
287            other.to::<String>().into()
288        } else {
289            unsafe { other.make_ref() }.into()
290        }
291    }
292}
293
294impl<'a> MownStr<'a> {
295    /// Convert this `MownStr` to any type `T`
296    /// that can be created from either a `&str` or a `Box<str>`.
297    ///
298    /// This can not be implemented with the `From` trait,
299    /// because this would conflict with `From<MownStr<'a>>`.
300    ///
301    /// # Usage
302    /// ```
303    /// # use mownstr::MownStr;
304    /// # use std::rc::Rc;
305    /// let ms = MownStr::from("hello world");
306    /// let rc = ms.to::<Rc<str>>();
307    ///
308    /// let o1 = Some(MownStr::from("hi there"));
309    /// let o2 = o1.map(MownStr::to::<Rc<str>>);
310    /// ```
311    pub fn to<T>(mut self) -> T
312    where
313        T: From<&'a str> + From<Box<str>>,
314    {
315        if self.is_owned() {
316            unsafe { self.extract_box() }.into()
317        } else {
318            unsafe { self.make_ref() }.into()
319        }
320    }
321}
322
323#[cfg(test)]
324#[allow(clippy::eq_op)]
325mod test {
326    use super::MownStr;
327    use std::borrow::Cow;
328    use std::collections::HashSet;
329    use std::fs;
330    use std::str::FromStr;
331
332    #[test]
333    fn size() {
334        assert_eq!(
335            std::mem::size_of::<MownStr<'static>>(),
336            std::mem::size_of::<&'static str>(),
337        );
338    }
339
340    #[test]
341    fn niche() {
342        assert_eq!(
343            std::mem::size_of::<MownStr<'static>>(),
344            std::mem::size_of::<Option<MownStr<'static>>>(),
345        );
346    }
347
348    #[test]
349    fn test_build_borrowed_empty() {
350        let mown: MownStr = "".into();
351        assert!(mown.is_borrowed());
352        assert_eq!(mown, "");
353    }
354
355    #[test]
356    fn test_build_borrowed() {
357        let mown: MownStr = "hello".into();
358        assert!(mown.is_borrowed());
359    }
360
361    #[test]
362    fn test_build_owned_from_box() {
363        let bx: Box<str> = "hello".into();
364        let mown: MownStr = bx.into();
365        assert!(mown.is_owned());
366    }
367
368    #[test]
369    fn test_build_owned_from_string() {
370        let mown: MownStr = "hello".to_string().into();
371        assert!(mown.is_owned());
372    }
373
374    #[test]
375    fn test_build_borrowed_from_cow() {
376        let mown: MownStr = Cow::Borrowed("hello").into();
377        assert!(mown.is_borrowed());
378    }
379
380    #[test]
381    fn test_build_owned_from_cow() {
382        let mown: MownStr = Cow::<str>::Owned("hello".to_string()).into();
383        assert!(mown.is_owned());
384    }
385
386    #[test]
387    fn test_borrowed() {
388        let mown1: MownStr = "hello".to_string().into();
389        let mown2 = mown1.borrowed();
390        assert!(mown2.is_borrowed());
391        assert_eq!(mown1, mown2);
392    }
393
394    #[test]
395    fn test_deref() {
396        let txt = "hello";
397        let mown1: MownStr = txt.into();
398        assert_eq!(&*mown1, txt);
399        assert_eq!(&mown1[..], txt);
400        let mown2: MownStr = txt.to_string().into();
401        assert_eq!(&*mown2, txt);
402        assert_eq!(&mown2[..], txt);
403    }
404
405    #[test]
406    fn test_hash() {
407        let txt = "hello";
408        let mown1: MownStr = txt.into();
409        let mown2: MownStr = txt.to_string().into();
410
411        let mut set = HashSet::new();
412        set.insert(mown1.clone());
413        assert!(set.contains(&mown1));
414        assert!(set.contains(&mown2));
415        assert!(set.contains(txt));
416
417        let mut set = HashSet::new();
418        set.insert(mown2.clone());
419        assert!(set.contains(&mown1));
420        assert!(set.contains(&mown2));
421        assert!(set.contains(txt));
422    }
423
424    #[test]
425    fn test_eq() {
426        let txt = "hello";
427        let mown1: MownStr = txt.into();
428        let mown2: MownStr = txt.to_string().into();
429
430        assert_eq!(mown1, txt);
431        assert_eq!(mown1, mown1);
432        assert_eq!(mown1, mown2);
433        assert_eq!(mown2, txt);
434        assert_eq!(mown2, mown1);
435        assert_eq!(mown2, mown2);
436        assert_eq!(txt, mown1);
437        assert_eq!(txt, mown2);
438    }
439
440    #[test]
441    fn test_order() {
442        let txt = "hello";
443        let mown1: MownStr = txt[..4].into();
444        let mown2: MownStr = txt[..3].to_string().into();
445
446        assert!(mown1 <= txt);
447        assert!(mown1 <= mown1);
448        assert!(mown1 >= mown2);
449        assert!(mown2 <= txt);
450        assert!(mown2 <= mown1);
451        assert!(mown2 >= mown2);
452        assert!(txt >= mown1);
453        assert!(txt >= mown2);
454    }
455
456    #[test]
457    fn test_display() {
458        let mown1: MownStr = "hello".into();
459        let mown2: MownStr = "hello".to_string().into();
460        assert_eq!(format!("{:?}", mown1), "\"hello\"");
461        assert_eq!(format!("{:?}", mown2), "\"hello\"");
462        assert_eq!(format!("{}", mown1), "hello");
463        assert_eq!(format!("{}", mown2), "hello");
464    }
465
466    #[test]
467    fn no_double_free() {
468        let bx = {
469            let mown = MownStr::from("hello world".to_string());
470            assert_eq!(&mown[..4], "hell");
471            mown.to::<Box<str>>()
472        };
473        assert_eq!(&bx[..4], "hell");
474    }
475
476    #[cfg(target_os = "linux")]
477    #[test]
478    fn no_memory_leak() {
479        // performs several MownStr allocation in sequence,
480        // droping each one before allocating the next one
481        // (unless the v.pop() line below is commented out).
482        //
483        // If there is no memory leak,
484        // the increase in memory should be roughly 1 time the allocated size;
485        // otherwise, it should be roghly 10 times that size.
486
487        let m0 = get_rss_anon();
488        println!("memory = {} kB", m0);
489        let mut v = vec![];
490        for i in 0..10 {
491            v.pop(); // COMMENT THIS LINE OUT to simulate a memory leak
492            let s = unsafe { String::from_utf8_unchecked(vec![b'a' + i; CAP]) };
493            v.push(MownStr::from(s));
494            println!(
495                "{} MownStr(s) in the Vec, of len {}, starting with {:?}",
496                v.len(),
497                v[v.len() - 1].len(),
498                &v[v.len() - 1][..2]
499            );
500        }
501        let m1 = get_rss_anon();
502        println!("memory = {} kB", m1);
503        assert!(!v.is_empty()); // ensure that v is not optimized away to soon
504        let increase = (m1 - m0) as f64 / (CAP / 1000) as f64;
505        println!("increase = {}", increase);
506        assert!(increase < 1.5);
507    }
508
509    #[test]
510    fn empty_string() {
511        let empty = "".to_string();
512        let _ = MownStr::from(empty);
513    }
514
515    const CAP: usize = 100_000_000;
516
517    fn get_rss_anon() -> usize {
518        let txt = fs::read_to_string("/proc/self/status").expect("read proc status");
519        let txt = txt.split("RssAnon:").nth(1).unwrap();
520        let txt = txt.split(" kB").next().unwrap();
521        let txt = txt.trim();
522        usize::from_str(txt).unwrap()
523    }
524}