sophia_api/term/
language_tag.rs1use lazy_static::lazy_static;
6use regex::Regex;
7use std::borrow::Borrow;
8use std::cmp::{Ordering, PartialOrd};
9use std::fmt::Debug;
10use thiserror::Error;
11
12lazy_static! {
13 static ref LANG_TAG: Regex = Regex::new(r#"(?x)
21 ^
22 [A-Za-z][A-Za-z0-9]*
23 (-[A-Za-z0-9]+)*
24 $
25 "#).unwrap();
26}
27
28#[derive(Clone, Copy, Debug)]
47pub struct LanguageTag<T: Borrow<str>>(T);
48
49impl<T: Borrow<str>> LanguageTag<T> {
50 pub fn new(tag: T) -> Result<Self, InvalidLanguageTag> {
53 if LANG_TAG.is_match(tag.borrow()) {
54 Ok(LanguageTag(tag))
55 } else {
56 Err(InvalidLanguageTag(tag.borrow().to_string()))
57 }
58 }
59
60 pub fn new_unchecked(tag: T) -> Self {
64 assert!(LANG_TAG.is_match(tag.borrow()));
65 LanguageTag(tag)
66 }
67
68 pub fn unwrap(self) -> T {
70 self.0
71 }
72
73 pub fn as_str(&self) -> &str {
75 self.0.borrow()
76 }
77
78 pub fn as_ref(&self) -> LanguageTag<&str> {
80 LanguageTag(self.0.borrow())
81 }
82
83 pub fn map_unchecked<F, U>(self, f: F) -> LanguageTag<U>
89 where
90 F: FnOnce(T) -> U,
91 U: Borrow<str>,
92 {
93 LanguageTag(f(self.0))
94 }
95}
96
97impl LanguageTag<&'static str> {
98 pub const fn new_unchecked_const(inner: &'static str) -> Self {
102 Self(inner)
103 }
104}
105
106impl<T: Borrow<str>> std::ops::Deref for LanguageTag<T> {
107 type Target = T;
108
109 fn deref(&self) -> &T {
110 &self.0
111 }
112}
113
114impl<T: Borrow<str>> AsRef<T> for LanguageTag<T> {
115 fn as_ref(&self) -> &T {
116 &self.0
117 }
118}
119
120impl<T: Borrow<str>> AsRef<str> for LanguageTag<T> {
121 fn as_ref(&self) -> &str {
122 self.0.borrow()
123 }
124}
125
126impl<T: Borrow<str>> Borrow<T> for LanguageTag<T> {
127 fn borrow(&self) -> &T {
128 &self.0
129 }
130}
131
132impl<T: Borrow<str>> Borrow<str> for LanguageTag<T> {
133 fn borrow(&self) -> &str {
134 self.0.borrow()
135 }
136}
137
138impl<T: Borrow<str>, U: Borrow<str>> PartialEq<LanguageTag<T>> for LanguageTag<U> {
139 fn eq(&self, other: &LanguageTag<T>) -> bool {
140 self.as_str().eq_ignore_ascii_case(other.as_str())
141 }
142}
143
144impl<T: Borrow<str>> PartialEq<str> for LanguageTag<T> {
145 fn eq(&self, other: &str) -> bool {
146 self.as_str().eq_ignore_ascii_case(other)
147 }
148}
149
150impl<T: Borrow<str>> Eq for LanguageTag<T> {}
151
152impl<T: Borrow<str>> PartialOrd for LanguageTag<T> {
153 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
154 Some(self.cmp(other))
155 }
156}
157
158impl<T: Borrow<str>> PartialOrd<str> for LanguageTag<T> {
159 fn partial_cmp(&self, other: &str) -> Option<Ordering> {
160 let iter1 = self.as_str().chars().map(|c| c.to_ascii_lowercase());
161 let iter2 = other.chars().map(|c| c.to_ascii_lowercase());
162 iter1.partial_cmp(iter2)
163 }
164}
165
166impl<T: Borrow<str>> Ord for LanguageTag<T> {
167 fn cmp(&self, other: &LanguageTag<T>) -> Ordering {
168 let iter1 = self.as_str().chars().map(|c| c.to_ascii_lowercase());
169 let iter2 = other.as_str().chars().map(|c| c.to_ascii_lowercase());
170 iter1.cmp(iter2)
171 }
172}
173
174impl<T: Borrow<str>> std::hash::Hash for LanguageTag<T> {
175 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
176 self.as_str()
177 .chars()
178 .map(|c| c.to_ascii_lowercase())
179 .for_each(|c| c.hash(state));
180 }
181}
182
183#[derive(Debug, Error)]
185#[error("The given language tag '{0}' does not comply with BCP47")]
186pub struct InvalidLanguageTag(pub String);
187
188impl<'a> std::ops::Mul<LanguageTag<&'a str>> for &'a str {
189 type Output = super::SimpleTerm<'a>;
190
191 fn mul(self, rhs: LanguageTag<&'a str>) -> Self::Output {
192 super::SimpleTerm::LiteralLanguage(self.into(), rhs.map_unchecked(mownstr::MownStr::from))
193 }
194}
195
196#[cfg(test)]
197mod test {
198 use crate::term::Term;
199
200 use super::*;
201 use test_case::test_case;
202
203 #[test_case("en")]
204 #[test_case("fr")]
205 #[test_case("fr-FR")]
206 #[test_case("fr-ca")]
207 #[test_case("fr-056")]
208 #[test_case("ja-Hani")]
209 #[test_case("ja-Hira")]
210 #[test_case("abc-de-fg-hi")]
211 #[test_case("x-abc-de-fg-hi")]
212 fn valid(tag: &str) {
213 assert!(LanguageTag::new(tag).is_ok());
214 }
215
216 #[test_case(""; "empty")]
217 #[test_case(" "; "space")]
218 #[test_case("éh")]
219 #[test_case("a.")]
220 fn invalid(tag: &str) {
221 assert!(LanguageTag::new(tag).is_err());
222 }
223
224 #[test_case("fr", "fr"; "all_lower")]
225 #[test_case("fr-ca", "fr-ca"; "all_lower_with_country")]
226 #[test_case("fr", "FR"; "language_differ")]
227 #[test_case("en-us", "en-US"; "country_differ")]
228 fn case_insensitive_eq(tag1: &str, tag2: &str) {
229 let ltag1 = LanguageTag::new_unchecked(tag1);
230 let ltag2 = LanguageTag::new_unchecked(tag2);
231 assert_eq!(ltag1, ltag2); assert_eq!(<ag1, tag2); }
234
235 #[test_case("EN", "FR"; "all_upper")]
236 #[test_case("en", "fr"; "all_lower")]
237 #[test_case("en", "FR"; "lower_upper")]
238 #[test_case("EN", "fr"; "upper_lower")]
239 #[test_case("en-UK", "en-US"; "counry_all_upper")]
240 #[test_case("en-uk", "en-us"; "counry_all_lower")]
241 #[test_case("en-uk", "en-US"; "counry_lower_upper")]
242 #[test_case("en-UK", "en-us"; "counry_upper_lower")]
243 fn case_insensitive_cmp(tag1: &str, tag2: &str) {
244 let ltag1 = LanguageTag::new_unchecked(tag1);
245 let ltag2 = LanguageTag::new_unchecked(tag2);
246 assert!(ltag1 <= ltag2); assert!(<ag1 <= tag2); }
249
250 #[test]
251 fn test_product() {
252 let en = LanguageTag::new("en").unwrap();
253 let frfr = LanguageTag::new("fr-FR").unwrap();
254 let t1 = "chat" * en;
255 assert!(t1.is_literal());
256 assert_eq!(t1.lexical_form().unwrap(), "chat");
257 assert_eq!(t1.language_tag().unwrap(), en);
258 let t2 = "chat" * frfr;
259 assert!(t2.is_literal());
260 assert_eq!(t2.lexical_form().unwrap(), "chat");
261 assert_eq!(t2.language_tag().unwrap(), frfr);
262 let t3 = "cat" * en;
263 assert!(t3.is_literal());
264 assert_eq!(t3.lexical_form().unwrap(), "cat");
265 assert_eq!(t3.language_tag().unwrap(), en);
266 }
267}