1#![doc = include_str!("../README.md")]
2#![cfg_attr(docsrs, feature(doc_auto_cfg))]
3#![deny(unsafe_code)]
4#![no_std]
5
6#[cfg(feature = "std")]
7extern crate std;
8
9extern crate alloc;
10
11use alloc::borrow::{Borrow, Cow};
12use alloc::boxed::Box;
13use alloc::fmt;
14use alloc::str::{FromStr, Split};
15use alloc::string::String;
16use core::cmp::Ordering;
17use core::hash::{Hash, Hasher};
18use core::iter::once;
19use core::ops::Deref;
20#[cfg(feature = "serde")]
21use serde::{Deserialize, Deserializer, Serialize, Serializer};
22
23#[derive(Copy, Clone)]
32pub struct LanguageTag<T> {
33 tag: T,
34 positions: TagElementsPositions,
35}
36
37impl<T: Deref<Target = str>> LanguageTag<T> {
38 pub fn parse(tag: T) -> Result<Self, LanguageTagParseError> {
50 let positions = parse_language_tag(&tag, &mut VoidOutputBuffer::default())?;
51 Ok(Self { tag, positions })
52 }
53
54 #[inline]
56 pub fn as_str(&self) -> &str {
57 &self.tag
58 }
59
60 #[inline]
62 pub fn into_inner(self) -> T {
63 self.tag
64 }
65
66 #[inline]
75 pub fn primary_language(&self) -> &str {
76 &self.tag[..self.positions.language_end]
77 }
78
79 #[inline]
90 pub fn extended_language(&self) -> Option<&str> {
91 if self.positions.language_end == self.positions.extlang_end {
92 None
93 } else {
94 Some(&self.tag[self.positions.language_end + 1..self.positions.extlang_end])
95 }
96 }
97
98 #[inline]
109 pub fn extended_language_subtags(&self) -> impl Iterator<Item = &str> {
110 self.extended_language().unwrap_or("").split_terminator('-')
111 }
112
113 #[inline]
123 pub fn full_language(&self) -> &str {
124 &self.tag[..self.positions.extlang_end]
125 }
126
127 #[inline]
136 pub fn script(&self) -> Option<&str> {
137 if self.positions.extlang_end == self.positions.script_end {
138 None
139 } else {
140 Some(&self.tag[self.positions.extlang_end + 1..self.positions.script_end])
141 }
142 }
143
144 #[inline]
153 pub fn region(&self) -> Option<&str> {
154 if self.positions.script_end == self.positions.region_end {
155 None
156 } else {
157 Some(&self.tag[self.positions.script_end + 1..self.positions.region_end])
158 }
159 }
160
161 #[inline]
170 pub fn variant(&self) -> Option<&str> {
171 if self.positions.region_end == self.positions.variant_end {
172 None
173 } else {
174 Some(&self.tag[self.positions.region_end + 1..self.positions.variant_end])
175 }
176 }
177
178 #[inline]
187 pub fn variant_subtags(&self) -> impl Iterator<Item = &str> {
188 self.variant().unwrap_or("").split_terminator('-')
189 }
190
191 #[inline]
200 pub fn extension(&self) -> Option<&str> {
201 if self.positions.variant_end == self.positions.extension_end {
202 None
203 } else {
204 Some(&self.tag[self.positions.variant_end + 1..self.positions.extension_end])
205 }
206 }
207
208 #[inline]
217 pub fn extension_subtags(&self) -> impl Iterator<Item = (char, &str)> {
218 match self.extension() {
219 Some(parts) => ExtensionsIterator::new(parts),
220 None => ExtensionsIterator::new(""),
221 }
222 }
223
224 #[inline]
233 pub fn private_use(&self) -> Option<&str> {
234 if self.tag.starts_with("x-") {
235 Some(&self.tag)
236 } else if self.positions.extension_end == self.tag.len() {
237 None
238 } else {
239 Some(&self.tag[self.positions.extension_end + 1..])
240 }
241 }
242
243 #[inline]
252 pub fn private_use_subtags(&self) -> impl Iterator<Item = &str> {
253 self.private_use()
254 .map(|part| &part[2..])
255 .unwrap_or("")
256 .split_terminator('-')
257 }
258}
259
260impl LanguageTag<String> {
261 pub fn parse_and_normalize(tag: &str) -> Result<Self, LanguageTagParseError> {
276 let mut output_buffer = String::with_capacity(tag.len());
277 let positions = parse_language_tag(tag, &mut output_buffer)?;
278 Ok(Self {
279 tag: output_buffer,
280 positions,
281 })
282 }
283}
284
285impl<Lft: PartialEq<Rhs>, Rhs> PartialEq<LanguageTag<Rhs>> for LanguageTag<Lft> {
286 #[inline]
287 fn eq(&self, other: &LanguageTag<Rhs>) -> bool {
288 self.tag.eq(&other.tag)
289 }
290}
291
292impl<T: PartialEq<str>> PartialEq<str> for LanguageTag<T> {
293 #[inline]
294 fn eq(&self, other: &str) -> bool {
295 self.tag.eq(other)
296 }
297}
298
299impl<'a, T: PartialEq<&'a str>> PartialEq<&'a str> for LanguageTag<T> {
300 #[inline]
301 fn eq(&self, other: &&'a str) -> bool {
302 self.tag.eq(other)
303 }
304}
305
306impl<T: PartialEq<String>> PartialEq<String> for LanguageTag<T> {
307 #[inline]
308 fn eq(&self, other: &String) -> bool {
309 self.tag.eq(other)
310 }
311}
312
313impl<'a, T: PartialEq<Cow<'a, str>>> PartialEq<Cow<'a, str>> for LanguageTag<T> {
314 #[inline]
315 fn eq(&self, other: &Cow<'a, str>) -> bool {
316 self.tag.eq(other)
317 }
318}
319
320impl<T: PartialEq<str>> PartialEq<LanguageTag<T>> for str {
321 #[inline]
322 fn eq(&self, other: &LanguageTag<T>) -> bool {
323 other.tag.eq(self)
324 }
325}
326
327impl<'a, T: PartialEq<&'a str>> PartialEq<LanguageTag<T>> for &'a str {
328 #[inline]
329 fn eq(&self, other: &LanguageTag<T>) -> bool {
330 other.tag.eq(self)
331 }
332}
333
334impl<T: PartialEq<String>> PartialEq<LanguageTag<T>> for String {
335 #[inline]
336 fn eq(&self, other: &LanguageTag<T>) -> bool {
337 other.tag.eq(self)
338 }
339}
340
341impl<'a, T: PartialEq<Cow<'a, str>>> PartialEq<LanguageTag<T>> for Cow<'a, str> {
342 #[inline]
343 fn eq(&self, other: &LanguageTag<T>) -> bool {
344 other.tag.eq(self)
345 }
346}
347
348impl<T: Eq> Eq for LanguageTag<T> {}
349
350impl<T: Hash> Hash for LanguageTag<T> {
351 #[inline]
352 fn hash<H: Hasher>(&self, state: &mut H) {
353 self.tag.hash(state)
354 }
355}
356
357impl<T: PartialOrd> PartialOrd for LanguageTag<T> {
358 #[inline]
359 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
360 self.tag.partial_cmp(&other.tag)
361 }
362}
363
364impl<T: Ord> Ord for LanguageTag<T> {
365 #[inline]
366 fn cmp(&self, other: &Self) -> Ordering {
367 self.tag.cmp(&other.tag)
368 }
369}
370
371impl<T: Deref<Target = str>> Deref for LanguageTag<T> {
372 type Target = str;
373
374 #[inline]
375 fn deref(&self) -> &str {
376 self.tag.deref()
377 }
378}
379
380impl<T: AsRef<str>> AsRef<str> for LanguageTag<T> {
381 #[inline]
382 fn as_ref(&self) -> &str {
383 self.tag.as_ref()
384 }
385}
386
387impl<T: Borrow<str>> Borrow<str> for LanguageTag<T> {
388 #[inline]
389 fn borrow(&self) -> &str {
390 self.tag.borrow()
391 }
392}
393
394impl<T: fmt::Debug> fmt::Debug for LanguageTag<T> {
395 #[inline]
396 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
397 self.tag.fmt(f)
398 }
399}
400
401impl<T: fmt::Display> fmt::Display for LanguageTag<T> {
402 #[inline]
403 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
404 self.tag.fmt(f)
405 }
406}
407
408impl FromStr for LanguageTag<String> {
409 type Err = LanguageTagParseError;
410
411 #[inline]
412 fn from_str(tag: &str) -> Result<Self, LanguageTagParseError> {
413 Self::parse_and_normalize(tag)
414 }
415}
416
417impl<'a> From<LanguageTag<&'a str>> for LanguageTag<String> {
418 #[inline]
419 fn from(tag: LanguageTag<&'a str>) -> Self {
420 Self {
421 tag: tag.tag.into(),
422 positions: tag.positions,
423 }
424 }
425}
426
427impl<'a> From<LanguageTag<Cow<'a, str>>> for LanguageTag<String> {
428 #[inline]
429 fn from(tag: LanguageTag<Cow<'a, str>>) -> Self {
430 Self {
431 tag: tag.tag.into(),
432 positions: tag.positions,
433 }
434 }
435}
436
437impl From<LanguageTag<Box<str>>> for LanguageTag<String> {
438 #[inline]
439 fn from(tag: LanguageTag<Box<str>>) -> Self {
440 Self {
441 tag: tag.tag.into(),
442 positions: tag.positions,
443 }
444 }
445}
446
447impl<'a> From<LanguageTag<&'a str>> for LanguageTag<Cow<'a, str>> {
448 #[inline]
449 fn from(tag: LanguageTag<&'a str>) -> Self {
450 Self {
451 tag: tag.tag.into(),
452 positions: tag.positions,
453 }
454 }
455}
456
457impl<'a> From<LanguageTag<String>> for LanguageTag<Cow<'a, str>> {
458 #[inline]
459 fn from(tag: LanguageTag<String>) -> Self {
460 Self {
461 tag: tag.tag.into(),
462 positions: tag.positions,
463 }
464 }
465}
466
467#[cfg(feature = "serde")]
468impl<T: Serialize> Serialize for LanguageTag<T> {
469 fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
470 self.tag.serialize(serializer)
471 }
472}
473
474#[cfg(feature = "serde")]
475impl<'de, T: Deref<Target = str> + Deserialize<'de>> Deserialize<'de> for LanguageTag<T> {
476 fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<LanguageTag<T>, D::Error> {
477 use serde::de::Error;
478
479 Self::parse(T::deserialize(deserializer)?).map_err(D::Error::custom)
480 }
481}
482
483#[derive(Debug)]
485pub struct LanguageTagParseError {
486 kind: TagParseErrorKind,
487}
488
489impl fmt::Display for LanguageTagParseError {
490 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
491 match self.kind {
492 TagParseErrorKind::EmptyExtension => {
493 write!(f, "If an extension subtag is present, it must not be empty")
494 }
495 TagParseErrorKind::EmptyPrivateUse => {
496 write!(f, "If the `x` subtag is present, it must not be empty")
497 }
498 TagParseErrorKind::ForbiddenChar => {
499 write!(f, "The langtag contains a char not allowed")
500 }
501 TagParseErrorKind::InvalidSubtag => write!(
502 f,
503 "A subtag fails to parse, it does not match any other subtags"
504 ),
505 TagParseErrorKind::InvalidLanguage => write!(f, "The given language subtag is invalid"),
506 TagParseErrorKind::SubtagTooLong => {
507 write!(f, "A subtag may be eight characters in length at maximum")
508 }
509 TagParseErrorKind::EmptySubtag => write!(f, "A subtag should not be empty"),
510 TagParseErrorKind::TooManyExtlangs => {
511 write!(f, "At maximum three extlangs are allowed")
512 }
513 }
514 }
515}
516
517#[cfg(feature = "std")]
520impl std::error::Error for LanguageTagParseError {}
521
522#[derive(Debug)]
523enum TagParseErrorKind {
524 EmptyExtension,
526 EmptyPrivateUse,
528 ForbiddenChar,
530 InvalidSubtag,
532 InvalidLanguage,
534 SubtagTooLong,
536 EmptySubtag,
538 TooManyExtlangs,
540}
541
542#[derive(Copy, Clone, Debug)]
543struct TagElementsPositions {
544 language_end: usize,
545 extlang_end: usize,
546 script_end: usize,
547 region_end: usize,
548 variant_end: usize,
549 extension_end: usize,
550}
551
552trait OutputBuffer: Extend<char> {
553 fn push(&mut self, c: char);
554
555 fn push_str(&mut self, s: &str);
556}
557
558#[derive(Default)]
559struct VoidOutputBuffer {}
560
561impl OutputBuffer for VoidOutputBuffer {
562 #[inline]
563 fn push(&mut self, _: char) {}
564
565 #[inline]
566 fn push_str(&mut self, _: &str) {}
567}
568
569impl Extend<char> for VoidOutputBuffer {
570 #[inline]
571 fn extend<T: IntoIterator<Item = char>>(&mut self, _: T) {}
572}
573
574impl OutputBuffer for String {
575 #[inline]
576 fn push(&mut self, c: char) {
577 self.push(c);
578 }
579
580 #[inline]
581 fn push_str(&mut self, s: &str) {
582 self.push_str(s);
583 }
584}
585
586fn parse_language_tag(
588 input: &str,
589 output: &mut impl OutputBuffer,
590) -> Result<TagElementsPositions, LanguageTagParseError> {
591 if let Some(tag) = GRANDFATHEREDS
593 .iter()
594 .find(|record| record.eq_ignore_ascii_case(input))
595 {
596 output.push_str(tag);
597 Ok(TagElementsPositions {
598 language_end: tag.len(),
599 extlang_end: tag.len(),
600 script_end: tag.len(),
601 region_end: tag.len(),
602 variant_end: tag.len(),
603 extension_end: tag.len(),
604 })
605 } else if input.starts_with("x-") || input.starts_with("X-") {
606 if !is_alphanumeric_or_dash(input) {
608 Err(LanguageTagParseError {
609 kind: TagParseErrorKind::ForbiddenChar,
610 })
611 } else if input.len() == 2 {
612 Err(LanguageTagParseError {
613 kind: TagParseErrorKind::EmptyPrivateUse,
614 })
615 } else {
616 output.extend(input.chars().map(|c| c.to_ascii_lowercase()));
617 Ok(TagElementsPositions {
618 language_end: input.len(),
619 extlang_end: input.len(),
620 script_end: input.len(),
621 region_end: input.len(),
622 variant_end: input.len(),
623 extension_end: input.len(),
624 })
625 }
626 } else {
627 parse_langtag(input, output)
628 }
629}
630
631fn parse_langtag(
633 input: &str,
634 output: &mut impl OutputBuffer,
635) -> Result<TagElementsPositions, LanguageTagParseError> {
636 #[derive(PartialEq, Eq)]
637 enum State {
638 Start,
639 AfterLanguage,
640 AfterExtLang,
641 AfterScript,
642 AfterRegion,
643 InExtension { expected: bool },
644 InPrivateUse { expected: bool },
645 }
646
647 let mut state = State::Start;
648 let mut language_end = 0;
649 let mut extlang_end = 0;
650 let mut script_end = 0;
651 let mut region_end = 0;
652 let mut variant_end = 0;
653 let mut extension_end = 0;
654 let mut extlangs_count = 0;
655 for (subtag, end) in SubTagIterator::new(input) {
656 if subtag.is_empty() {
657 return Err(LanguageTagParseError {
658 kind: TagParseErrorKind::EmptySubtag,
659 });
660 }
661 if subtag.len() > 8 {
662 return Err(LanguageTagParseError {
663 kind: TagParseErrorKind::SubtagTooLong,
664 });
665 }
666 if state == State::Start {
667 if subtag.len() < 2 || !is_alphabetic(subtag) {
669 return Err(LanguageTagParseError {
670 kind: TagParseErrorKind::InvalidLanguage,
671 });
672 }
673 language_end = end;
674 output.extend(to_lowercase(subtag));
675 if subtag.len() < 4 {
676 state = State::AfterLanguage;
678 } else {
679 state = State::AfterExtLang;
680 }
681 } else if let State::InPrivateUse { .. } = state {
682 if !is_alphanumeric(subtag) {
683 return Err(LanguageTagParseError {
684 kind: TagParseErrorKind::InvalidSubtag,
685 });
686 }
687 output.push('-');
688 output.extend(to_lowercase(subtag));
689 state = State::InPrivateUse { expected: false };
690 } else if subtag == "x" || subtag == "X" {
691 if let State::InExtension { expected: true } = state {
693 return Err(LanguageTagParseError {
694 kind: TagParseErrorKind::EmptyExtension,
695 });
696 }
697 output.push('-');
698 output.push('x');
699 state = State::InPrivateUse { expected: true };
700 } else if subtag.len() == 1 && is_alphanumeric(subtag) {
701 if let State::InExtension { expected: true } = state {
703 return Err(LanguageTagParseError {
704 kind: TagParseErrorKind::EmptyExtension,
705 });
706 }
707 let extension_tag = subtag.chars().next().unwrap().to_ascii_lowercase();
708 output.push('-');
709 output.push(extension_tag);
710 state = State::InExtension { expected: true };
711 } else if let State::InExtension { .. } = state {
712 if !is_alphanumeric(subtag) {
713 return Err(LanguageTagParseError {
714 kind: TagParseErrorKind::InvalidSubtag,
715 });
716 }
717 extension_end = end;
718 output.push('-');
719 output.extend(to_lowercase(subtag));
720 state = State::InExtension { expected: false };
721 } else if state == State::AfterLanguage && subtag.len() == 3 && is_alphabetic(subtag) {
722 extlangs_count += 1;
723 if extlangs_count > 3 {
724 return Err(LanguageTagParseError {
725 kind: TagParseErrorKind::TooManyExtlangs,
726 });
727 }
728 extlang_end = end;
730 output.push('-');
731 output.extend(to_lowercase(subtag));
732 } else if (state == State::AfterLanguage || state == State::AfterExtLang)
733 && subtag.len() == 4
734 && is_alphabetic(subtag)
735 {
736 script_end = end;
738 output.push('-');
739 output.extend(to_uppercase_first(subtag));
740 state = State::AfterScript;
741 } else if (state == State::AfterLanguage
742 || state == State::AfterExtLang
743 || state == State::AfterScript)
744 && (subtag.len() == 2 && is_alphabetic(subtag)
745 || subtag.len() == 3 && is_numeric(subtag))
746 {
747 region_end = end;
749 output.push('-');
750 output.extend(to_uppercase(subtag));
751 state = State::AfterRegion;
752 } else if (state == State::AfterLanguage
753 || state == State::AfterExtLang
754 || state == State::AfterScript
755 || state == State::AfterRegion)
756 && is_alphanumeric(subtag)
757 && (subtag.len() >= 5 && is_alphabetic(&subtag[0..1])
758 || subtag.len() >= 4 && is_numeric(&subtag[0..1]))
759 {
760 variant_end = end;
762 output.push('-');
763 output.extend(to_lowercase(subtag));
764 state = State::AfterRegion;
765 } else {
766 return Err(LanguageTagParseError {
767 kind: TagParseErrorKind::InvalidSubtag,
768 });
769 }
770 }
771
772 if let State::InExtension { expected: true } = state {
774 return Err(LanguageTagParseError {
775 kind: TagParseErrorKind::EmptyExtension,
776 });
777 }
778 if let State::InPrivateUse { expected: true } = state {
779 return Err(LanguageTagParseError {
780 kind: TagParseErrorKind::EmptyPrivateUse,
781 });
782 }
783
784 if extlang_end < language_end {
786 extlang_end = language_end;
787 }
788 if script_end < extlang_end {
789 script_end = extlang_end;
790 }
791 if region_end < script_end {
792 region_end = script_end;
793 }
794 if variant_end < region_end {
795 variant_end = region_end;
796 }
797 if extension_end < variant_end {
798 extension_end = variant_end;
799 }
800
801 Ok(TagElementsPositions {
802 language_end,
803 extlang_end,
804 script_end,
805 region_end,
806 variant_end,
807 extension_end,
808 })
809}
810
811struct ExtensionsIterator<'a> {
812 input: &'a str,
813}
814
815impl<'a> ExtensionsIterator<'a> {
816 fn new(input: &'a str) -> Self {
817 Self { input }
818 }
819}
820
821impl<'a> Iterator for ExtensionsIterator<'a> {
822 type Item = (char, &'a str);
823
824 fn next(&mut self) -> Option<(char, &'a str)> {
825 let mut parts_iterator = self.input.split_terminator('-');
826 let singleton = parts_iterator.next()?.chars().next().unwrap();
827 let mut content_size: usize = 2;
828 for part in parts_iterator {
829 if part.len() == 1 {
830 let content = &self.input[2..content_size - 1];
831 self.input = &self.input[content_size..];
832 return Some((singleton, content));
833 } else {
834 content_size += part.len() + 1;
835 }
836 }
837 let result = self.input.get(2..).map(|content| (singleton, content));
838 self.input = "";
839 result
840 }
841}
842
843struct SubTagIterator<'a> {
844 split: Split<'a, char>,
845 position: usize,
846}
847
848impl<'a> SubTagIterator<'a> {
849 #[inline]
850 fn new(input: &'a str) -> Self {
851 Self {
852 split: input.split('-'),
853 position: 0,
854 }
855 }
856}
857
858impl<'a> Iterator for SubTagIterator<'a> {
859 type Item = (&'a str, usize);
860
861 #[inline]
862 fn next(&mut self) -> Option<(&'a str, usize)> {
863 let tag = self.split.next()?;
864 let tag_end = self.position + tag.len();
865 self.position = tag_end + 1;
866 Some((tag, tag_end))
867 }
868}
869
870#[inline]
871fn is_alphabetic(s: &str) -> bool {
872 s.chars().all(|x| x.is_ascii_alphabetic())
873}
874
875#[inline]
876fn is_numeric(s: &str) -> bool {
877 s.chars().all(|x| x.is_ascii_digit())
878}
879
880#[inline]
881fn is_alphanumeric(s: &str) -> bool {
882 s.chars().all(|x| x.is_ascii_alphanumeric())
883}
884
885#[inline]
886fn is_alphanumeric_or_dash(s: &str) -> bool {
887 s.chars().all(|x| x.is_ascii_alphanumeric() || x == '-')
888}
889
890#[inline]
891fn to_uppercase(s: &str) -> impl Iterator<Item = char> + '_ {
892 s.chars().map(|c| c.to_ascii_uppercase())
893}
894
895#[inline]
897fn to_uppercase_first(s: &str) -> impl Iterator<Item = char> + '_ {
898 let mut chars = s.chars();
899 once(chars.next().unwrap().to_ascii_uppercase()).chain(chars.map(|c| c.to_ascii_lowercase()))
900}
901
902#[inline]
903fn to_lowercase(s: &str) -> impl Iterator<Item = char> + '_ {
904 s.chars().map(|c| c.to_ascii_lowercase())
905}
906
907const GRANDFATHEREDS: [&str; 26] = [
908 "art-lojban",
909 "cel-gaulish",
910 "en-GB-oed",
911 "i-ami",
912 "i-bnn",
913 "i-default",
914 "i-enochian",
915 "i-hak",
916 "i-klingon",
917 "i-lux",
918 "i-mingo",
919 "i-navajo",
920 "i-pwn",
921 "i-tao",
922 "i-tay",
923 "i-tsu",
924 "no-bok",
925 "no-nyn",
926 "sgn-BE-FR",
927 "sgn-BE-NL",
928 "sgn-CH-DE",
929 "zh-guoyu",
930 "zh-hakka",
931 "zh-min",
932 "zh-min-nan",
933 "zh-xiang",
934];