1use chumsky::prelude::*;
2use logos::Logos;
3use lsp_core::prelude::{
4 spanned, Spanned, SparqlAggregate, SparqlCall, SparqlExpr, SparqlKeyword, StringStyle, Token,
5};
6
7#[allow(non_camel_case_types)]
8#[derive(Logos, Debug, PartialEq)]
9#[logos(skip r"[ \t\n\f\r]+")] enum SparqlToken {
11 #[token("REGEX", |_| SparqlKeyword::Regex, ignore(case))]
12 #[token("SUBSTR", |_| SparqlKeyword::Substr, ignore(case))]
13 #[token("REPLACE", |_| SparqlKeyword::Replace, ignore(case))]
14 #[token("EXISTS", |_| SparqlKeyword::Exists, ignore(case))]
15 #[token("SELECT", |_| SparqlKeyword::Select, ignore(case))]
16 #[token("DISTINCT", |_| SparqlKeyword::Distinct, ignore(case))]
17 #[token("REDUCED", |_| SparqlKeyword::Reduced, ignore(case))]
18 #[token("OPTIONAL", |_| SparqlKeyword::Optional, ignore(case))]
19 #[token("UNION", |_| SparqlKeyword::Union, ignore(case))]
20 #[token("AS", |_| SparqlKeyword::As, ignore(case))]
21 #[token("CONSTRUCT", |_| SparqlKeyword::Construct, ignore(case))]
22 #[token("WHERE", |_| SparqlKeyword::Where, ignore(case))]
23 #[token("DESCRIBE", |_| SparqlKeyword::Describe, ignore(case))]
24 #[token("ASK", |_| SparqlKeyword::Ask, ignore(case))]
25 #[token("FROM", |_| SparqlKeyword::From, ignore(case))]
26 #[token("NAMED", |_| SparqlKeyword::Named, ignore(case))]
27 #[token("GROUP", |_| SparqlKeyword::Group, ignore(case))]
28 #[token("BY", |_| SparqlKeyword::By, ignore(case))]
29 #[token("HAVING", |_| SparqlKeyword::Having, ignore(case))]
30 #[token("ORDER", |_| SparqlKeyword::Order, ignore(case))]
31 #[token("ASC", |_| SparqlKeyword::Asc, ignore(case))]
32 #[token("DESC", |_| SparqlKeyword::Desc, ignore(case))]
33 #[token("LIMIT", |_| SparqlKeyword::Limit, ignore(case))]
34 #[token("OFFSET", |_| SparqlKeyword::Offset, ignore(case))]
35 #[token("VALUES", |_| SparqlKeyword::Values, ignore(case))]
36 #[token("LOAD", |_| SparqlKeyword::Load, ignore(case))]
37 #[token("SILENT", |_| SparqlKeyword::Silent, ignore(case))]
38 #[token("CLEAR", |_| SparqlKeyword::Clear, ignore(case))]
39 #[token("DROP", |_| SparqlKeyword::Drop, ignore(case))]
40 #[token("CREATE", |_| SparqlKeyword::Create, ignore(case))]
41 #[token("ADD", |_| SparqlKeyword::Add, ignore(case))]
42 #[token("MOVE", |_| SparqlKeyword::Move, ignore(case))]
43 #[token("COPY", |_| SparqlKeyword::Copy, ignore(case))]
44 #[token("INSERT", |_| SparqlKeyword::Insert, ignore(case))]
45 #[token("DATA", |_| SparqlKeyword::Data, ignore(case))]
46 #[token("DELETE", |_| SparqlKeyword::Delete, ignore(case))]
47 #[token("WITH", |_| SparqlKeyword::With, ignore(case))]
48 #[token("USING", |_| SparqlKeyword::Using, ignore(case))]
49 #[token("DEFAULT", |_| SparqlKeyword::Default, ignore(case))]
50 #[token("ALL", |_| SparqlKeyword::All, ignore(case))]
51 #[token("GRAPH", |_| SparqlKeyword::Graph, ignore(case))]
52 #[token("SERVICE", |_| SparqlKeyword::Service, ignore(case))]
53 #[token("BIND", |_| SparqlKeyword::Bind, ignore(case))]
54 #[token("UNDEF", |_| SparqlKeyword::Undef, ignore(case))]
55 #[token("MINUS", |_| SparqlKeyword::Minus, ignore(case))]
56 #[token("FILTER", |_| SparqlKeyword::Filter, ignore(case))]
57 Kwd(SparqlKeyword),
58
59 #[token("COUNT", |_| SparqlAggregate::Count, ignore(case))]
60 #[token("SUM", |_| SparqlAggregate::Sum, ignore(case))]
61 #[token("MIN", |_| SparqlAggregate::Min, ignore(case))]
62 #[token("MAX", |_| SparqlAggregate::Max, ignore(case))]
63 #[token("AVG", |_| SparqlAggregate::Avg, ignore(case))]
64 #[token("SAMPLE", |_| SparqlAggregate::Sample, ignore(case))]
65 #[token("GROUP_CONCAT", |_| SparqlAggregate::GroupConcat, ignore(case))]
66 Agg(SparqlAggregate),
67
68 #[token("STR", |_| SparqlCall::Str, ignore(case))]
69 #[token("LANG", |_| SparqlCall::Lang, ignore(case))]
70 #[token("langMatches", |_| SparqlCall::LangMatches, ignore(case))]
71 #[token("LANGDIR", |_| SparqlCall::LangDir, ignore(case))]
72 #[token("datatype", |_| SparqlCall::Datatype, ignore(case))]
73 #[token("BOUND", |_| SparqlCall::Bound, ignore(case))]
74 #[token("IRI", |_| SparqlCall::Iri, ignore(case))]
75 #[token("URI", |_| SparqlCall::Uri, ignore(case))]
76 #[token("BNODE", |_| SparqlCall::Bnode, ignore(case))]
77 #[token("RAND", |_| SparqlCall::Rand, ignore(case))]
78 #[token("ABS", |_| SparqlCall::Abs, ignore(case))]
79 #[token("CEIL", |_| SparqlCall::Ceil, ignore(case))]
80 #[token("FLOOR", |_| SparqlCall::Floor, ignore(case))]
81 #[token("ROUND", |_| SparqlCall::Round, ignore(case))]
82 #[token("CONCAT", |_| SparqlCall::Concat, ignore(case))]
83 #[token("STRLEN", |_| SparqlCall::StrLen, ignore(case))]
84 #[token("UCASE", |_| SparqlCall::Ucase, ignore(case))]
85 #[token("lcase", |_| SparqlCall::Lcase, ignore(case))]
86 #[token("ENCODE_FOR_URI", |_| SparqlCall::EncodeForUri, ignore(case))]
87 #[token("CONTAINS", |_| SparqlCall::Contains, ignore(case))]
88 #[token("STRSTARTS", |_| SparqlCall::StrStarts, ignore(case))]
89 #[token("STRENDS", |_| SparqlCall::StrEnds, ignore(case))]
90 #[token("STRBEFORE", |_| SparqlCall::StrBefore, ignore(case))]
91 #[token("STRAFTER", |_| SparqlCall::StrAfter, ignore(case))]
92 #[token("YEAR", |_| SparqlCall::Year, ignore(case))]
93 #[token("MONTH", |_| SparqlCall::Month, ignore(case))]
94 #[token("DAY", |_| SparqlCall::Day, ignore(case))]
95 #[token("HOURS", |_| SparqlCall::Hours, ignore(case))]
96 #[token("MINUTES", |_| SparqlCall::Minutes, ignore(case))]
97 #[token("SECONDS", |_| SparqlCall::Seconds, ignore(case))]
98 #[token("TIMEZONE", |_| SparqlCall::Timezone, ignore(case))]
99 #[token("TZ", |_| SparqlCall::Tz, ignore(case))]
100 #[token("NOW", |_| SparqlCall::Now, ignore(case))]
101 #[token("UUID", |_| SparqlCall::Uuid, ignore(case))]
102 #[token("STRUUID", |_| SparqlCall::StrUuid, ignore(case))]
103 #[token("MD5", |_| SparqlCall::Md5, ignore(case))]
104 #[token("SHA1", |_| SparqlCall::Sha1, ignore(case))]
105 #[token("SHA256", |_| SparqlCall::Sha256, ignore(case))]
106 #[token("SHA384", |_| SparqlCall::Sha384, ignore(case))]
107 #[token("SHA512", |_| SparqlCall::Sha512, ignore(case))]
108 #[token("COALESCE", |_| SparqlCall::Coalesce, ignore(case))]
109 #[token("IF", |_| SparqlCall::If, ignore(case))]
110 #[token("STRLANG", |_| SparqlCall::StrLang, ignore(case))]
111 #[token("STRLANGDIR", |_| SparqlCall::StrLangDir, ignore(case))]
112 #[token("STRDT", |_| SparqlCall::StrDt, ignore(case))]
113 #[token("sameTerm", |_| SparqlCall::SameTerm, ignore(case))]
114 #[token("isIRI", |_| SparqlCall::IsIri, ignore(case))]
115 #[token("isURI", |_| SparqlCall::IsUri, ignore(case))]
116 #[token("isBLANK", |_| SparqlCall::IsBlank, ignore(case))]
117 #[token("isLITERAL", |_| SparqlCall::IsLiteral, ignore(case))]
118 #[token("isNUMBERIC", |_| SparqlCall::IsNumeric, ignore(case))]
119 #[token("hasLANG", |_| SparqlCall::HasLang, ignore(case))]
120 #[token("hasLANGDIR", |_| SparqlCall::HasLangDir, ignore(case))]
121 #[token("isTRIPLE", |_| SparqlCall::IsTriple, ignore(case))]
122 #[token("TRIPLE", |_| SparqlCall::Triple, ignore(case))]
123 #[token("SUBJECT", |_| SparqlCall::Subject, ignore(case))]
124 #[token("PREDICATE", |_| SparqlCall::Predicate, ignore(case))]
125 #[token("OBJECT", |_| SparqlCall::Object, ignore(case))]
126 Call(SparqlCall),
127
128 #[token("in", |_| SparqlExpr::In, ignore(case))]
129 #[token("not", |_| SparqlExpr::Not, ignore(case))]
130 #[token("||", |_| SparqlExpr::Or, ignore(case))]
131 #[token("&&", |_| SparqlExpr::And, ignore(case))]
132 #[token("=", |_| SparqlExpr::Equal, ignore(case))]
133 #[token("!=", |_| SparqlExpr::NotEqual, ignore(case))]
134 #[token("<", |_| SparqlExpr::Lt, ignore(case))]
135 #[token(">", |_| SparqlExpr::Gt, ignore(case))]
136 #[token("<=", |_| SparqlExpr::Lte, ignore(case))]
137 #[token(">=", |_| SparqlExpr::Gte, ignore(case))]
138 #[token("+", |_| SparqlExpr::Plus, ignore(case))]
139 #[token("-", |_| SparqlExpr::Minus, ignore(case))]
140 #[token("*", |_| SparqlExpr::Times, ignore(case))]
141 #[token("/", |_| SparqlExpr::Divide, ignore(case))]
142 #[token("!", |_| SparqlExpr::Exclamation, ignore(case))]
143 Expr(SparqlExpr),
144
145 #[token("prefix", ignore(case))]
146 SqPrefix,
147
148 #[token("base", ignore(case))]
149 SqBase,
150
151 #[token("[")]
152 SqOpen,
153
154 #[token("]")]
155 SqClose,
156
157 #[token("(")]
158 BraceOpen,
159
160 #[token(")")]
161 BraceClose,
162
163 #[token("a")]
164 TypeTag,
165
166 #[token(";")]
167 Semi,
168
169 #[token(",")]
170 Comma,
171 #[token(".")]
172 Stop,
173
174 #[token("^^")]
175 DataTag,
176
177 #[token("true")]
178 True,
179
180 #[token("false")]
181 False,
182
183 #[token("{")]
184 CurlOpen,
185
186 #[token("}")]
187 CurlClose,
188
189 #[regex(r#"(_:((([A-Z]|[a-z]|[\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF])|_)|[0-9])((([A-Z]|[a-z]|[\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF])|_)|\-|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040])*(\.*((([A-Z]|[a-z]|[\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF])|_)|\-|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040])((([A-Z]|[a-z]|[\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF])|_)|\-|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040])*)*)"#)]
190 BLANK_NODE_LABEL,
191
192 #[regex(r#"([+-]?(([0-9]+\.[0-9]*([eE][+-]?[0-9]+))|(\.([0-9])+([eE][+-]?[0-9]+))|(([0-9])+([eE][+-]?[0-9]+))))"#)]
193 DOUBLE,
194
195 #[regex(r#"([+-]?([0-9])*\.([0-9])+)"#)]
196 DECIMAL,
197
198 #[regex(r#"([+-]?[0-9]+)"#)]
199 INTEGER,
200
201 #[regex(r#"([+-]?[0-9]+\.)"#)]
202 INTEGER_WITH_DOT,
203
204 #[regex(r#"(@[a-zA-Z][a-zA-Z]*(\-[a-zA-Z0-9][a-zA-Z0-9]*)*)"#)]
205 LANGTAG,
206
207 #[regex(r#"("([^\x22\x5C\x0A\x0D]|(\\[tbnrf\"'\\])|((\\u([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f]))|(\\U([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f]))))*")"#)]
208 STRING_LITERAL_QUOTE,
209
210 #[regex(r#"('([^\x27\x5C\x0A\x0D]|(\\[tbnrf\"'\\])|((\\u([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f]))|(\\U([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f]))))*')"#)]
211 STRING_LITERAL_SINGLE_QUOTE,
212
213 #[regex(r#"('''(('|'')?([^'\\]|(\\[tbnrf\"'\\])|((\\u([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f]))|(\\U([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])))))*''')"#)]
214 STRING_LITERAL_LONG_SINGLE_QUOTE,
215
216 #[regex(r#"("""(("|"")?([^"\\]|(\\[tbnrf\"'\\])|((\\u([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f]))|(\\U([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])))))*""")"#)]
217 STRING_LITERAL_LONG_QUOTE,
218
219 #[regex(r#"(<([^\x00-\x20<>"{}|^`\\]|((\\u([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f]))|(\\U([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f]))))*>)"#)]
220 IRIREF,
221
222 #[regex(r#"((([A-Z]|[a-z]|[\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF])((((([A-Z]|[a-z]|[\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF])|_)|\-|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040])|\.)*((([A-Z]|[a-z]|[\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF])|_)|\-|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040]))?)?:)"#)]
223 PNAME_NS,
224
225 #[regex(r#"(((([A-Z]|[a-z]|[\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF])((((([A-Z]|[a-z]|[\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF])|_)|\-|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040])|\.)*((([A-Z]|[a-z]|[\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF])|_)|\-|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040]))?)?:)(((([A-Z]|[a-z]|[\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF])|_)|:|[0-9]|((%([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f]))|(\\(_|\~|\.|\-|!|\$|\&|\\"|\(|\)|\*|\+|"|'|;|=|,|/|\?|\#|@|%))))(\.|(((([A-Z]|[a-z]|[\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF])|_)|\-|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040])|:|((%([0-9]|[A-F]|[a-f])([0-9]|[A-F]|[a-f]))|(\\(_|\~|\.|\-|!|\$|\&|\\"|\(|\)|\*|\+|"|'|;|=|,|/|\?|\#|@|%)))))*))"#)]
226 PNAME_LN,
227
228 #[regex(r#"#[^\u000D\u000A]*"#)]
229 Comment,
230
231 #[regex(r#"((\?|\$)((([A-Z]|[a-z]|[\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF])|_)|[0-9])((([A-Z]|[a-z]|[\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF])|_)|[0-9]|\u00B7|[\u0300-\u036F]|[\u203F-\u2040])*)"#)]
232 Variable,
233}
234
235pub fn parse_tokens_str<'a>(text: &'a str) -> (Vec<Spanned<Token>>, Vec<Simple<char>>) {
236 let mut tokens = Vec::new();
237 let mut errors = Vec::new();
238 let mut lex = SparqlToken::lexer(text);
239 while let Some(x) = lex.next() {
240 let t = || text[lex.span()].to_string();
241 let t2 = |d_start, d_end| {
242 let span = lex.span();
243 let (start, end) = (span.start, span.end);
244 text[start + d_start..end - d_end].to_string()
245 };
246
247 match x {
248 Ok(token) => {
249 let t = match token {
250 SparqlToken::Comment => Token::Comment(t()),
251 SparqlToken::SqPrefix => Token::SparqlPrefix,
252 SparqlToken::SqBase => Token::SparqlBase,
253 SparqlToken::SqOpen => Token::SqOpen,
254 SparqlToken::SqClose => Token::SqClose,
255 SparqlToken::BraceOpen => Token::BracketOpen,
256 SparqlToken::BraceClose => Token::BracketClose,
257 SparqlToken::TypeTag => Token::PredType,
258 SparqlToken::CurlOpen => Token::CurlOpen,
259 SparqlToken::CurlClose => Token::CurlClose,
260 SparqlToken::Semi => Token::PredicateSplit,
261 SparqlToken::Comma => Token::Comma,
262 SparqlToken::Stop => Token::Stop,
263 SparqlToken::DataTag => Token::DataTypeDelim,
264 SparqlToken::True => Token::True,
265 SparqlToken::False => Token::False,
266 SparqlToken::BLANK_NODE_LABEL => Token::BlankNodeLabel(t2(2, 0)),
267 SparqlToken::DOUBLE => Token::Number(t()),
268 SparqlToken::DECIMAL => Token::Number(t()),
269 SparqlToken::INTEGER => Token::Number(t()),
270 SparqlToken::INTEGER_WITH_DOT => {
271 let span = lex.span();
272 let end = span.end - 1;
273 let start = span.start;
274 tokens.push(spanned(
275 Token::Number(text[start..end].to_string()),
276 start..end,
277 ));
278 tokens.push(spanned(Token::Stop, end..end + 1));
279
280 continue;
281 }
282 SparqlToken::LANGTAG => Token::LangTag(t2(1, 0)),
283 SparqlToken::STRING_LITERAL_LONG_SINGLE_QUOTE => {
284 Token::Str(t2(3, 3), StringStyle::SingleLong)
285 }
286 SparqlToken::STRING_LITERAL_QUOTE => Token::Str(t2(1, 1), StringStyle::Double),
287 SparqlToken::STRING_LITERAL_LONG_QUOTE => {
288 Token::Str(t2(3, 3), StringStyle::DoubleLong)
289 }
290 SparqlToken::STRING_LITERAL_SINGLE_QUOTE => {
291 Token::Str(t2(1, 1), StringStyle::Single)
292 }
293 SparqlToken::IRIREF => Token::IRIRef(t2(1, 1)),
294 SparqlToken::PNAME_LN | SparqlToken::PNAME_NS => {
295 let st = &text[lex.span()];
296 let ends_with_stop = st.ends_with('.');
297
298 if ends_with_stop {
299 let span = lex.span();
300 let end = span.end - 1;
301 let start = span.start;
302 if let Some((first, second)) = text[start..end].split_once(":") {
303 tokens.push(spanned(
304 Token::PNameLN(Some(first.to_string()), second.to_string()),
305 start..end,
306 ));
307 tokens.push(spanned(Token::Stop, end..end + 1));
308 } else {
309 tokens.push(spanned(
310 Token::Invalid(text[start..end].to_string()),
311 start..end,
312 ));
313 tokens.push(spanned(Token::Stop, end..end + 1));
314 }
315 continue;
316 } else {
317 if let Some((first, second)) = text[lex.span()].split_once(":") {
318 Token::PNameLN(Some(first.to_string()), second.to_string())
319 } else {
320 Token::Invalid(t())
321 }
322 }
323 }
324 SparqlToken::Kwd(sparql_keyword) => Token::SparqlKeyword(sparql_keyword),
325 SparqlToken::Agg(sparql_aggregate) => Token::SparqlAggregate(sparql_aggregate),
326 SparqlToken::Call(sparql_call) => Token::SparqlCall(sparql_call),
327 SparqlToken::Expr(sparql_expr) => Token::SparqlExpr(sparql_expr),
328 SparqlToken::Variable => Token::Variable(t()),
329 };
330 tokens.push(spanned(t, lex.span()));
331 }
332 Err(_) => {
333 tokens.push(spanned(Token::Invalid(t()), lex.span()));
334 errors.push(Simple::custom(
335 lex.span(),
336 format!("Unexpected token '{}'", &text[lex.span()]),
337 ))
338 }
339 }
340 }
341
342 (tokens, errors)
343}
344
345pub fn parse_tokens_str_safe(text: &str) -> Result<Vec<Spanned<Token>>, Vec<Simple<char>>> {
346 let (t, e) = parse_tokens_str(text);
347 if e.is_empty() {
348 Ok(t)
349 } else {
350 Err(e)
351 }
352}
353
354#[cfg(test)]
355mod tests {
356 use super::parse_tokens_str;
357
358 #[test]
359 fn parse_random_tokens_1() {
360 let inp = r#"
361PREFIX ent: <http://org.example.com/employees#>
362DESCRIBE ?x WHERE { ?x ent:employeeId "1234" }
363 "#;
364
365 let (tok, er) = parse_tokens_str(inp);
366 assert_eq!(tok.len(), 11);
367 assert_eq!(er, vec![]);
368 }
369
370 #[test]
371 fn parse_random_tokens_2() {
372 let inp = r#"
373PREFIX dc: <http://purl.org/dc/elements/1.1/>
374SELECT ?title
375WHERE {
376 ?x dc:title ?title
377 FILTER regex(?title, "^SPARQL")
378}
379 "#;
380
381 let (tok, er) = parse_tokens_str(inp);
382 assert_eq!(tok.len(), 18);
383 assert_eq!(er, vec![]);
384 }
385
386 #[test]
387 fn parse_random_tokens_3() {
388 let inp = r#"
389PREFIX dc: <http://purl.org/dc/elements/1.1/>
390PREFIX ns: <http://example.org/ns#>
391
392SELECT ?title ?price
393WHERE {
394 ?x ns:price ?price .
395 FILTER (?price < 30.5)
396 ?x dc:title ?title .
397}
398 "#;
399
400 let (tok, er) = parse_tokens_str(inp);
401 assert_eq!(tok.len(), 26);
402 assert_eq!(er, vec![]);
403 }
404
405 #[test]
406 fn parse_random_tokens_4() {
407 let inp = r#"
408PREFIX foaf: <http://xmlns.com/foaf/0.1/>
409SELECT ?name ?mbox
410WHERE {
411 ?x foaf:name ?name .
412 OPTIONAL { ?x foaf:mbox ?mbox }
413}
414 "#;
415
416 let (tok, er) = parse_tokens_str(inp);
417 assert_eq!(tok.len(), 19);
418 assert_eq!(er, vec![]);
419 }
420
421 #[test]
422 fn parse_random_tokens_5() {
423 let inp = r#"
424PREFIX foaf: <http://xmlns.com/foaf/0.1/>
425ASK {
426 ?x foaf:name "Alice" ;
427 foaf:mbox <mailto:alice@work.example>
428}
429 "#;
430
431 let (tok, er) = parse_tokens_str(inp);
432 assert_eq!(tok.len(), 12);
433 assert_eq!(er, vec![]);
434 }
435
436 #[test]
437 fn parse_random_tokens_6() {
438 let inp = r#"
439PREFIX dc: <http://purl.org/dc/elements/1.1/>
440PREFIX app: <http://example.org/ns#>
441PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
442
443CONSTRUCT { ?s ?p ?o } WHERE
444{
445 GRAPH ?g { ?s ?p ?o } .
446 ?g dc:publisher <http://www.w3.org/> .
447 ?g dc:date ?date .
448 FILTER ( app:customDate(?date) > "2005-02-28T00:00:00Z"^^xsd:dateTime ) .
449}
450 "#;
451
452 let (tok, er) = parse_tokens_str(inp);
453 assert_eq!(tok.len(), 46);
454 assert_eq!(er, vec![]);
455 }
456
457 #[test]
458 fn parse_random_tokens_7() {
459 let inp = r#"
460PREFIX foaf: <http://xmlns.com/foaf/0.1/>
461PREFIX vcard: <http://www.w3.org/2001/vcard-rdf/3.0#>
462
463CONSTRUCT {
464 ?x vcard:N _:v .
465 _:v vcard:givenName ?gname .
466 _:v vcard:familyName ?fname
467} WHERE {
468 { ?x foaf:firstname ?gname } UNION { ?x foaf:givenname ?gname } .
469 { ?x foaf:surname ?fname } UNION { ?x foaf:family_name ?fname } .
470}
471 "#;
472
473 let (tok, er) = parse_tokens_str(inp);
474 assert_eq!(tok.len(), 47);
475 assert_eq!(er, vec![]);
476 }
477
478 #[test]
479 fn parse_random_tokens_8() {
480 let inp = r#"
481PREFIX dc: <http://purl.org/dc/elements/1.1/>
482PREFIX ns: <http://example.org/ns#>
483SELECT ?title (?p*(1-?discount) AS ?price)
484{ ?x ns:price ?p .
485 ?x dc:title ?title .
486 [] ns:discount ?discount
487}
488 "#;
489
490 let (tok, er) = parse_tokens_str(inp);
491 for t in &tok {
492 println!("t {:?}", t);
493 }
494 assert_eq!(tok.len(), 33);
495 assert_eq!(er, vec![]);
496 }
497}