1use crate::error::{QueryResultsParseError, QueryResultsSyntaxError};
4use oxrdf::vocab::rdf;
5use oxrdf::*;
6use quick_xml::escape::{escape, unescape};
7use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event};
8use quick_xml::reader::Config;
9use quick_xml::{Decoder, Error, Reader, Writer};
10use std::borrow::Cow;
11use std::collections::HashMap;
12use std::io::{self, BufReader, Read, Write};
13use std::mem::take;
14#[cfg(feature = "async-tokio")]
15use std::sync::Arc;
16#[cfg(feature = "async-tokio")]
17use tokio::io::{AsyncRead, AsyncWrite, BufReader as AsyncBufReader};
18
19pub fn write_boolean_xml_result<W: Write>(writer: W, value: bool) -> io::Result<W> {
20 let mut writer = Writer::new(writer);
21 for event in inner_write_boolean_xml_result(value) {
22 writer.write_event(event)?;
23 }
24 Ok(writer.into_inner())
25}
26
27#[cfg(feature = "async-tokio")]
28pub async fn tokio_async_write_boolean_xml_result<W: AsyncWrite + Unpin>(
29 writer: W,
30 value: bool,
31) -> io::Result<W> {
32 let mut writer = Writer::new(writer);
33 for event in inner_write_boolean_xml_result(value) {
34 writer
35 .write_event_async(event)
36 .await
37 .map_err(map_xml_error)?;
38 }
39 Ok(writer.into_inner())
40}
41
42fn inner_write_boolean_xml_result(value: bool) -> [Event<'static>; 8] {
43 [
44 Event::Decl(BytesDecl::new("1.0", None, None)),
45 Event::Start(
46 BytesStart::new("sparql")
47 .with_attributes([("xmlns", "http://www.w3.org/2005/sparql-results#")]),
48 ),
49 Event::Start(BytesStart::new("head")),
50 Event::End(BytesEnd::new("head")),
51 Event::Start(BytesStart::new("boolean")),
52 Event::Text(BytesText::new(if value { "true" } else { "false" })),
53 Event::End(BytesEnd::new("boolean")),
54 Event::End(BytesEnd::new("sparql")),
55 ]
56}
57
58pub struct WriterXmlSolutionsSerializer<W: Write> {
59 inner: InnerXmlSolutionsSerializer,
60 writer: Writer<W>,
61}
62
63impl<W: Write> WriterXmlSolutionsSerializer<W> {
64 pub fn start(writer: W, variables: &[Variable]) -> io::Result<Self> {
65 let mut writer = Writer::new(writer);
66 let mut buffer = Vec::with_capacity(48);
67 let inner = InnerXmlSolutionsSerializer::start(&mut buffer, variables);
68 Self::do_write(&mut writer, buffer)?;
69 Ok(Self { inner, writer })
70 }
71
72 pub fn serialize<'a>(
73 &mut self,
74 solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>,
75 ) -> io::Result<()> {
76 let mut buffer = Vec::with_capacity(48);
77 self.inner.write(&mut buffer, solution);
78 Self::do_write(&mut self.writer, buffer)
79 }
80
81 pub fn finish(mut self) -> io::Result<W> {
82 let mut buffer = Vec::with_capacity(4);
83 self.inner.finish(&mut buffer);
84 Self::do_write(&mut self.writer, buffer)?;
85 Ok(self.writer.into_inner())
86 }
87
88 fn do_write(writer: &mut Writer<W>, output: Vec<Event<'_>>) -> io::Result<()> {
89 for event in output {
90 writer.write_event(event)?;
91 }
92 Ok(())
93 }
94}
95
96#[cfg(feature = "async-tokio")]
97pub struct TokioAsyncWriterXmlSolutionsSerializer<W: AsyncWrite + Unpin> {
98 inner: InnerXmlSolutionsSerializer,
99 writer: Writer<W>,
100}
101
102#[cfg(feature = "async-tokio")]
103impl<W: AsyncWrite + Unpin> TokioAsyncWriterXmlSolutionsSerializer<W> {
104 pub async fn start(writer: W, variables: &[Variable]) -> io::Result<Self> {
105 let mut writer = Writer::new(writer);
106 let mut buffer = Vec::with_capacity(48);
107 let inner = InnerXmlSolutionsSerializer::start(&mut buffer, variables);
108 Self::do_write(&mut writer, buffer).await?;
109 Ok(Self { inner, writer })
110 }
111
112 pub async fn serialize<'a>(
113 &mut self,
114 solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>,
115 ) -> io::Result<()> {
116 let mut buffer = Vec::with_capacity(48);
117 self.inner.write(&mut buffer, solution);
118 Self::do_write(&mut self.writer, buffer).await
119 }
120
121 pub async fn finish(mut self) -> io::Result<W> {
122 let mut buffer = Vec::with_capacity(4);
123 self.inner.finish(&mut buffer);
124 Self::do_write(&mut self.writer, buffer).await?;
125 Ok(self.writer.into_inner())
126 }
127
128 async fn do_write(writer: &mut Writer<W>, output: Vec<Event<'_>>) -> io::Result<()> {
129 for event in output {
130 writer
131 .write_event_async(event)
132 .await
133 .map_err(map_xml_error)?;
134 }
135 Ok(())
136 }
137}
138
139struct InnerXmlSolutionsSerializer;
140
141impl InnerXmlSolutionsSerializer {
142 fn start<'a>(output: &mut Vec<Event<'a>>, variables: &'a [Variable]) -> Self {
143 output.push(Event::Decl(BytesDecl::new("1.0", None, None)));
144 output.push(Event::Start(BytesStart::new("sparql").with_attributes([(
145 "xmlns",
146 "http://www.w3.org/2005/sparql-results#",
147 )])));
148 output.push(Event::Start(BytesStart::new("head")));
149 for variable in variables {
150 output.push(Event::Empty(
151 BytesStart::new("variable").with_attributes([("name", variable.as_str())]),
152 ));
153 }
154 output.push(Event::End(BytesEnd::new("head")));
155 output.push(Event::Start(BytesStart::new("results")));
156 Self {}
157 }
158
159 #[allow(clippy::unused_self)]
160 fn write<'a>(
161 &self,
162 output: &mut Vec<Event<'a>>,
163 solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>,
164 ) {
165 output.push(Event::Start(BytesStart::new("result")));
166 for (variable, value) in solution {
167 output.push(Event::Start(
168 BytesStart::new("binding").with_attributes([("name", variable.as_str())]),
169 ));
170 write_xml_term(output, value);
171 output.push(Event::End(BytesEnd::new("binding")));
172 }
173 output.push(Event::End(BytesEnd::new("result")));
174 }
175
176 #[allow(clippy::unused_self)]
177 fn finish(self, output: &mut Vec<Event<'_>>) {
178 output.push(Event::End(BytesEnd::new("results")));
179 output.push(Event::End(BytesEnd::new("sparql")));
180 }
181}
182
183fn write_xml_term<'a>(output: &mut Vec<Event<'a>>, term: TermRef<'a>) {
184 match term {
185 TermRef::NamedNode(uri) => {
186 output.push(Event::Start(BytesStart::new("uri")));
187 output.push(Event::Text(BytesText::new(uri.as_str())));
188 output.push(Event::End(BytesEnd::new("uri")));
189 }
190 TermRef::BlankNode(bnode) => {
191 output.push(Event::Start(BytesStart::new("bnode")));
192 output.push(Event::Text(BytesText::new(bnode.as_str())));
193 output.push(Event::End(BytesEnd::new("bnode")));
194 }
195 TermRef::Literal(literal) => {
196 let mut start = BytesStart::new("literal");
197 if let Some(language) = literal.language() {
198 start.push_attribute(("xml:lang", language));
199 } else if !literal.is_plain() {
200 start.push_attribute(("datatype", literal.datatype().as_str()))
201 }
202 output.push(Event::Start(start));
203 output.push(Event::Text(BytesText::from_escaped(
204 escape_including_bound_whitespaces(literal.value()),
205 )));
206 output.push(Event::End(BytesEnd::new("literal")));
207 }
208 #[cfg(feature = "rdf-star")]
209 TermRef::Triple(triple) => {
210 output.push(Event::Start(BytesStart::new("triple")));
211 output.push(Event::Start(BytesStart::new("subject")));
212 write_xml_term(output, triple.subject.as_ref().into());
213 output.push(Event::End(BytesEnd::new("subject")));
214 output.push(Event::Start(BytesStart::new("predicate")));
215 write_xml_term(output, triple.predicate.as_ref().into());
216 output.push(Event::End(BytesEnd::new("predicate")));
217 output.push(Event::Start(BytesStart::new("object")));
218 write_xml_term(output, triple.object.as_ref());
219 output.push(Event::End(BytesEnd::new("object")));
220 output.push(Event::End(BytesEnd::new("triple")));
221 }
222 }
223}
224
225#[allow(clippy::large_enum_variant)]
226pub enum ReaderXmlQueryResultsParserOutput<R: Read> {
227 Solutions {
228 variables: Vec<Variable>,
229 solutions: ReaderXmlSolutionsParser<R>,
230 },
231 Boolean(bool),
232}
233
234impl<R: Read> ReaderXmlQueryResultsParserOutput<R> {
235 pub fn read(reader: R) -> Result<Self, QueryResultsParseError> {
236 let mut reader = Reader::from_reader(BufReader::new(reader));
237 XmlInnerQueryResultsParser::set_options(reader.config_mut());
238 let mut reader_buffer = Vec::new();
239 let mut inner = XmlInnerQueryResultsParser {
240 state: ResultsState::Start,
241 variables: Vec::new(),
242 decoder: reader.decoder(),
243 };
244 loop {
245 reader_buffer.clear();
246 let event = reader.read_event_into(&mut reader_buffer)?;
247 if let Some(result) = inner.read_event(event)? {
248 return Ok(match result {
249 XmlInnerQueryResults::Solutions {
250 variables,
251 solutions,
252 } => Self::Solutions {
253 variables,
254 solutions: ReaderXmlSolutionsParser {
255 reader,
256 inner: solutions,
257 reader_buffer,
258 },
259 },
260 XmlInnerQueryResults::Boolean(value) => Self::Boolean(value),
261 });
262 }
263 }
264 }
265}
266
267pub struct ReaderXmlSolutionsParser<R: Read> {
268 reader: Reader<BufReader<R>>,
269 inner: XmlInnerSolutionsParser,
270 reader_buffer: Vec<u8>,
271}
272
273impl<R: Read> ReaderXmlSolutionsParser<R> {
274 pub fn parse_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
275 loop {
276 self.reader_buffer.clear();
277 let event = self.reader.read_event_into(&mut self.reader_buffer)?;
278 if event == Event::Eof {
279 return Ok(None);
280 }
281 if let Some(solution) = self.inner.read_event(event)? {
282 return Ok(Some(solution));
283 }
284 }
285 }
286}
287
288#[cfg(feature = "async-tokio")]
289#[allow(clippy::large_enum_variant)]
290pub enum TokioAsyncReaderXmlQueryResultsParserOutput<R: AsyncRead + Unpin> {
291 Solutions {
292 variables: Vec<Variable>,
293 solutions: TokioAsyncReaderXmlSolutionsParser<R>,
294 },
295 Boolean(bool),
296}
297
298#[cfg(feature = "async-tokio")]
299impl<R: AsyncRead + Unpin> TokioAsyncReaderXmlQueryResultsParserOutput<R> {
300 pub async fn read(reader: R) -> Result<Self, QueryResultsParseError> {
301 let mut reader = Reader::from_reader(AsyncBufReader::new(reader));
302 XmlInnerQueryResultsParser::set_options(reader.config_mut());
303 let mut reader_buffer = Vec::new();
304 let mut inner = XmlInnerQueryResultsParser {
305 state: ResultsState::Start,
306 variables: Vec::new(),
307 decoder: reader.decoder(),
308 };
309 loop {
310 reader_buffer.clear();
311 let event = reader.read_event_into_async(&mut reader_buffer).await?;
312 if let Some(result) = inner.read_event(event)? {
313 return Ok(match result {
314 XmlInnerQueryResults::Solutions {
315 variables,
316 solutions,
317 } => Self::Solutions {
318 variables,
319 solutions: TokioAsyncReaderXmlSolutionsParser {
320 reader,
321 inner: solutions,
322 reader_buffer,
323 },
324 },
325 XmlInnerQueryResults::Boolean(value) => Self::Boolean(value),
326 });
327 }
328 }
329 }
330}
331
332#[cfg(feature = "async-tokio")]
333pub struct TokioAsyncReaderXmlSolutionsParser<R: AsyncRead + Unpin> {
334 reader: Reader<AsyncBufReader<R>>,
335 inner: XmlInnerSolutionsParser,
336 reader_buffer: Vec<u8>,
337}
338
339#[cfg(feature = "async-tokio")]
340impl<R: AsyncRead + Unpin> TokioAsyncReaderXmlSolutionsParser<R> {
341 pub async fn parse_next(
342 &mut self,
343 ) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
344 loop {
345 self.reader_buffer.clear();
346 let event = self
347 .reader
348 .read_event_into_async(&mut self.reader_buffer)
349 .await?;
350 if event == Event::Eof {
351 return Ok(None);
352 }
353 if let Some(solution) = self.inner.read_event(event)? {
354 return Ok(Some(solution));
355 }
356 }
357 }
358}
359
360#[allow(clippy::large_enum_variant)]
361pub enum SliceXmlQueryResultsParserOutput<'a> {
362 Solutions {
363 variables: Vec<Variable>,
364 solutions: SliceXmlSolutionsParser<'a>,
365 },
366 Boolean(bool),
367}
368
369impl<'a> SliceXmlQueryResultsParserOutput<'a> {
370 pub fn read(slice: &'a [u8]) -> Result<Self, QueryResultsSyntaxError> {
371 Self::do_read(slice).map_err(|e| match e {
372 QueryResultsParseError::Syntax(e) => e,
373 QueryResultsParseError::Io(e) => {
374 unreachable!("I/O error are not possible for slice but found {e}")
375 }
376 })
377 }
378
379 fn do_read(slice: &'a [u8]) -> Result<Self, QueryResultsParseError> {
380 let mut reader = Reader::from_reader(slice);
381 XmlInnerQueryResultsParser::set_options(reader.config_mut());
382 let mut reader_buffer = Vec::new();
383 let mut inner = XmlInnerQueryResultsParser {
384 state: ResultsState::Start,
385 variables: Vec::new(),
386 decoder: reader.decoder(),
387 };
388 loop {
389 reader_buffer.clear();
390 let event = reader.read_event_into(&mut reader_buffer)?;
391 if let Some(result) = inner.read_event(event)? {
392 return Ok(match result {
393 XmlInnerQueryResults::Solutions {
394 variables,
395 solutions,
396 } => Self::Solutions {
397 variables,
398 solutions: SliceXmlSolutionsParser {
399 reader,
400 inner: solutions,
401 reader_buffer,
402 },
403 },
404 XmlInnerQueryResults::Boolean(value) => Self::Boolean(value),
405 });
406 }
407 }
408 }
409}
410
411pub struct SliceXmlSolutionsParser<'a> {
412 reader: Reader<&'a [u8]>,
413 inner: XmlInnerSolutionsParser,
414 reader_buffer: Vec<u8>,
415}
416
417impl SliceXmlSolutionsParser<'_> {
418 pub fn parse_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsSyntaxError> {
419 self.do_parse_next().map_err(|e| match e {
420 QueryResultsParseError::Syntax(e) => e,
421 QueryResultsParseError::Io(e) => {
422 unreachable!("I/O error are not possible for slice but found {e}")
423 }
424 })
425 }
426
427 fn do_parse_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
428 loop {
429 self.reader_buffer.clear();
430 let event = self.reader.read_event_into(&mut self.reader_buffer)?;
431 if event == Event::Eof {
432 return Ok(None);
433 }
434 if let Some(solution) = self.inner.read_event(event)? {
435 return Ok(Some(solution));
436 }
437 }
438 }
439}
440
441#[allow(clippy::large_enum_variant)]
442enum XmlInnerQueryResults {
443 Solutions {
444 variables: Vec<Variable>,
445 solutions: XmlInnerSolutionsParser,
446 },
447 Boolean(bool),
448}
449
450#[derive(Clone, Copy)]
451enum ResultsState {
452 Start,
453 Sparql,
454 Head,
455 AfterHead,
456 Boolean,
457}
458
459struct XmlInnerQueryResultsParser {
460 state: ResultsState,
461 variables: Vec<Variable>,
462 decoder: Decoder,
463}
464
465impl XmlInnerQueryResultsParser {
466 fn set_options(config: &mut Config) {
467 config.trim_text(true);
468 config.expand_empty_elements = true;
469 }
470
471 pub fn read_event(
472 &mut self,
473 event: Event<'_>,
474 ) -> Result<Option<XmlInnerQueryResults>, QueryResultsParseError> {
475 match event {
476 Event::Start(event) => match self.state {
477 ResultsState::Start => {
478 if event.local_name().as_ref() == b"sparql" {
479 self.state = ResultsState::Sparql;
480 Ok(None)
481 } else {
482 Err(QueryResultsSyntaxError::msg(format!("Expecting <sparql> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into())
483 }
484 }
485 ResultsState::Sparql => {
486 if event.local_name().as_ref() == b"head" {
487 self.state = ResultsState::Head;
488 Ok(None)
489 } else {
490 Err(QueryResultsSyntaxError::msg(format!("Expecting <head> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into())
491 }
492 }
493 ResultsState::Head => {
494 if event.local_name().as_ref() == b"variable" {
495 let name = event.attributes()
496 .filter_map(Result::ok)
497 .find(|attr| attr.key.local_name().as_ref() == b"name")
498 .ok_or_else(|| QueryResultsSyntaxError::msg("No name attribute found for the <variable> tag"))?;
499 let name = unescape(&self.decoder.decode(&name.value)?)?.into_owned();
500 let variable = Variable::new(name).map_err(|e| QueryResultsSyntaxError::msg(format!("Invalid variable name: {e}")))?;
501 if self.variables.contains(&variable) {
502 return Err(QueryResultsSyntaxError::msg(format!(
503 "The variable {variable} is declared twice"
504 ))
505 .into());
506 }
507 self.variables.push(variable);
508 Ok(None)
509 } else if event.local_name().as_ref() == b"link" {
510 Ok(None)
512 } else {
513 Err(QueryResultsSyntaxError::msg(format!("Expecting <variable> or <link> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into())
514 }
515 }
516 ResultsState::AfterHead => {
517 if event.local_name().as_ref() == b"boolean" {
518 self.state = ResultsState::Boolean;
519 Ok(None)
520 } else if event.local_name().as_ref() == b"results" {
521 let mut mapping = HashMap::new();
522 for (i, var) in self.variables.iter().enumerate() {
523 mapping.insert(var.clone().into_string(), i);
524 }
525 Ok(Some(XmlInnerQueryResults::Solutions {
526 variables: take(&mut self.variables),
527 solutions: XmlInnerSolutionsParser {
528 decoder: self.decoder,
529 mapping,
530 state_stack: vec![State::Start, State::Start],
531 new_bindings: Vec::new(),
532 current_var: None,
533 term: None,
534 lang: None,
535 datatype: None,
536 subject_stack: Vec::new(),
537 predicate_stack: Vec::new(),
538 object_stack: Vec::new(),
539 },
540 }))
541 } else if event.local_name().as_ref() != b"link" && event.local_name().as_ref() != b"results" && event.local_name().as_ref() != b"boolean" {
542 Err(QueryResultsSyntaxError::msg(format!("Expecting sparql tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into())
543 } else {
544 Ok(None)
545 }
546 }
547 ResultsState::Boolean => Err(QueryResultsSyntaxError::msg(format!("Unexpected tag inside of <boolean> tag: <{}>", self.decoder.decode(event.name().as_ref())?)).into())
548 },
549 Event::Text(event) => {
550 let value = event.unescape()?;
551 match self.state {
552 ResultsState::Boolean => {
553 if value == "true" {
554 Ok(Some(XmlInnerQueryResults::Boolean(true)))
555 } else if value == "false" {
556 Ok(Some(XmlInnerQueryResults::Boolean(false)))
557 } else {
558 Err(QueryResultsSyntaxError::msg(format!("Unexpected boolean value. Found '{value}'")).into())
559 }
560 }
561 _ => Err(QueryResultsSyntaxError::msg(format!("Unexpected textual value found: '{value}'")).into())
562 }
563 }
564 Event::End(event) => {
565 if let ResultsState::Head = self.state {
566 if event.local_name().as_ref() == b"head" {
567 self.state = ResultsState::AfterHead
568 }
569 Ok(None)
570 } else {
571 Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag").into())
572 }
573 }
574 Event::Eof => Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag").into()),
575 Event::Comment(_) | Event::Decl(_) | Event::PI(_) | Event::DocType(_) => {
576 Ok(None)
577 }
578 Event::Empty(_) => unreachable!("Empty events are expended"),
579 Event::CData(_) => {
580 Err(QueryResultsSyntaxError::msg(
581 "<![CDATA[...]]> are not supported in SPARQL XML results",
582 )
583 .into())
584 }
585 }
586 }
587}
588
589enum State {
590 Start,
591 Result,
592 Binding,
593 Uri,
594 BNode,
595 Literal,
596 Triple,
597 Subject,
598 Predicate,
599 Object,
600}
601
602struct XmlInnerSolutionsParser {
603 decoder: Decoder,
604 mapping: HashMap<String, usize>,
605 state_stack: Vec<State>,
606 new_bindings: Vec<Option<Term>>,
607 current_var: Option<String>,
608 term: Option<Term>,
609 lang: Option<String>,
610 datatype: Option<NamedNode>,
611 subject_stack: Vec<Term>,
612 predicate_stack: Vec<Term>,
613 object_stack: Vec<Term>,
614}
615
616impl XmlInnerSolutionsParser {
617 #[allow(clippy::unwrap_in_result)]
618 pub fn read_event(
619 &mut self,
620 event: Event<'_>,
621 ) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
622 match event {
623 Event::Start(event) => match self.state_stack.last().ok_or_else(|| {
624 QueryResultsSyntaxError::msg("Extra XML is not allowed at the end of the document")
625 })? {
626 State::Start => {
627 if event.local_name().as_ref() == b"result" {
628 self.new_bindings = vec![None; self.mapping.len()];
629 self.state_stack.push(State::Result);
630 Ok(None)
631 } else {
632 Err(QueryResultsSyntaxError::msg(format!(
633 "Expecting <result>, found <{}>",
634 self.decoder.decode(event.name().as_ref())?
635 ))
636 .into())
637 }
638 }
639 State::Result => {
640 if event.local_name().as_ref() == b"binding" {
641 let Some(attr) = event
642 .attributes()
643 .filter_map(Result::ok)
644 .find(|attr| attr.key.local_name().as_ref() == b"name")
645 else {
646 return Err(QueryResultsSyntaxError::msg(
647 "No name attribute found for the <binding> tag",
648 )
649 .into());
650 };
651 self.current_var =
652 Some(unescape(&self.decoder.decode(&attr.value)?)?.into_owned());
653 self.state_stack.push(State::Binding);
654 Ok(None)
655 } else {
656 Err(QueryResultsSyntaxError::msg(format!(
657 "Expecting <binding>, found <{}>",
658 self.decoder.decode(event.name().as_ref())?
659 ))
660 .into())
661 }
662 }
663 State::Binding | State::Subject | State::Predicate | State::Object => {
664 if self.term.is_some() {
665 return Err(QueryResultsSyntaxError::msg(
666 "There is already a value for the current binding",
667 )
668 .into());
669 }
670 if event.local_name().as_ref() == b"uri" {
671 self.state_stack.push(State::Uri);
672 Ok(None)
673 } else if event.local_name().as_ref() == b"bnode" {
674 self.state_stack.push(State::BNode);
675 Ok(None)
676 } else if event.local_name().as_ref() == b"literal" {
677 for attr in event.attributes() {
678 let attr = attr.map_err(Error::from)?;
679 if attr.key.as_ref() == b"xml:lang" {
680 self.lang = Some(
681 unescape(&self.decoder.decode(&attr.value)?)?.into_owned(),
682 );
683 } else if attr.key.local_name().as_ref() == b"datatype" {
684 let iri = self.decoder.decode(&attr.value)?;
685 let iri = unescape(&iri)?;
686 self.datatype =
687 Some(NamedNode::new(iri.as_ref()).map_err(|e| {
688 QueryResultsSyntaxError::msg(format!(
689 "Invalid datatype IRI '{iri}': {e}"
690 ))
691 })?);
692 }
693 }
694 self.state_stack.push(State::Literal);
695 Ok(None)
696 } else if event.local_name().as_ref() == b"triple" {
697 self.state_stack.push(State::Triple);
698 Ok(None)
699 } else {
700 Err(QueryResultsSyntaxError::msg(format!(
701 "Expecting <uri>, <bnode> or <literal> found <{}>",
702 self.decoder.decode(event.name().as_ref())?
703 ))
704 .into())
705 }
706 }
707 State::Triple => {
708 if event.local_name().as_ref() == b"subject" {
709 self.state_stack.push(State::Subject);
710 Ok(None)
711 } else if event.local_name().as_ref() == b"predicate" {
712 self.state_stack.push(State::Predicate);
713 Ok(None)
714 } else if event.local_name().as_ref() == b"object" {
715 self.state_stack.push(State::Object);
716 Ok(None)
717 } else {
718 Err(QueryResultsSyntaxError::msg(format!(
719 "Expecting <subject>, <predicate> or <object> found <{}>",
720 self.decoder.decode(event.name().as_ref())?
721 ))
722 .into())
723 }
724 }
725 State::Uri => Err(QueryResultsSyntaxError::msg(format!(
726 "<uri> must only contain a string, found <{}>",
727 self.decoder.decode(event.name().as_ref())?
728 ))
729 .into()),
730 State::BNode => Err(QueryResultsSyntaxError::msg(format!(
731 "<uri> must only contain a string, found <{}>",
732 self.decoder.decode(event.name().as_ref())?
733 ))
734 .into()),
735 State::Literal => Err(QueryResultsSyntaxError::msg(format!(
736 "<uri> must only contain a string, found <{}>",
737 self.decoder.decode(event.name().as_ref())?
738 ))
739 .into()),
740 },
741 Event::Text(event) => {
742 let data = event.unescape()?;
743 match self.state_stack.last() {
744 Some(State::Uri) => {
745 self.term = Some(
746 NamedNode::new(data.to_string())
747 .map_err(|e| {
748 QueryResultsSyntaxError::msg(format!(
749 "Invalid IRI value '{data}': {e}"
750 ))
751 })?
752 .into(),
753 );
754 Ok(None)
755 }
756 Some(State::BNode) => {
757 self.term = Some(
758 BlankNode::new(data.to_string())
759 .map_err(|e| {
760 QueryResultsSyntaxError::msg(format!(
761 "Invalid blank node value '{data}': {e}"
762 ))
763 })?
764 .into(),
765 );
766 Ok(None)
767 }
768 Some(State::Literal) => {
769 self.term = Some(
770 build_literal(data, self.lang.take(), self.datatype.take())?.into(),
771 );
772 Ok(None)
773 }
774 _ => Err(QueryResultsSyntaxError::msg(format!(
775 "Unexpected textual value found: {data}"
776 ))
777 .into()),
778 }
779 }
780 Event::End(_) => match self.state_stack.pop().ok_or_else(|| {
781 QueryResultsSyntaxError::msg("Extra XML is not allowed at the end of the document")
782 })? {
783 State::Start | State::Uri => Ok(None),
784 State::Result => Ok(Some(take(&mut self.new_bindings))),
785 State::Binding => {
786 if let Some(var) = &self.current_var {
787 if let Some(var) = self.mapping.get(var) {
788 self.new_bindings[*var] = self.term.take()
789 } else {
790 return Err(
791 QueryResultsSyntaxError::msg(format!("The variable '{var}' is used in a binding but not declared in the variables list")).into()
792 );
793 }
794 } else {
795 return Err(QueryResultsSyntaxError::msg(
796 "No name found for <binding> tag",
797 )
798 .into());
799 }
800 Ok(None)
801 }
802 State::Subject => {
803 if let Some(subject) = self.term.take() {
804 self.subject_stack.push(subject)
805 }
806 Ok(None)
807 }
808 State::Predicate => {
809 if let Some(predicate) = self.term.take() {
810 self.predicate_stack.push(predicate)
811 }
812 Ok(None)
813 }
814 State::Object => {
815 if let Some(object) = self.term.take() {
816 self.object_stack.push(object)
817 }
818 Ok(None)
819 }
820 State::BNode => {
821 if self.term.is_none() {
822 self.term = Some(BlankNode::default().into())
824 }
825 Ok(None)
826 }
827 State::Literal => {
828 if self.term.is_none() {
829 self.term =
831 Some(build_literal("", self.lang.take(), self.datatype.take())?.into())
832 }
833 Ok(None)
834 }
835 State::Triple => {
836 #[cfg(feature = "rdf-star")]
837 if let (Some(subject), Some(predicate), Some(object)) = (
838 self.subject_stack.pop(),
839 self.predicate_stack.pop(),
840 self.object_stack.pop(),
841 ) {
842 self.term = Some(
843 Triple::new(
844 match subject {
845 Term::NamedNode(subject) => subject.into(),
846 Term::BlankNode(subject) => subject.into(),
847 Term::Triple(subject) => Subject::Triple(subject),
848 Term::Literal(_) => {
849 return Err(QueryResultsSyntaxError::msg(
850 "The <subject> value should not be a <literal>",
851 )
852 .into());
853 }
854 },
855 match predicate {
856 Term::NamedNode(predicate) => predicate,
857 _ => {
858 return Err(QueryResultsSyntaxError::msg(
859 "The <predicate> value should be an <uri>",
860 )
861 .into());
862 }
863 },
864 object,
865 )
866 .into(),
867 );
868 Ok(None)
869 } else {
870 Err(QueryResultsSyntaxError::msg(
871 "A <triple> should contain a <subject>, a <predicate> and an <object>",
872 )
873 .into())
874 }
875 #[cfg(not(feature = "rdf-star"))]
876 {
877 Err(QueryResultsSyntaxError::msg(
878 "The <triple> tag is only supported with RDF-star",
879 )
880 .into())
881 }
882 }
883 },
884 Event::Eof | Event::Comment(_) | Event::Decl(_) | Event::PI(_) | Event::DocType(_) => {
885 Ok(None)
886 }
887 Event::Empty(_) => unreachable!("Empty events are expended"),
888 Event::CData(_) => Err(QueryResultsSyntaxError::msg(
889 "<![CDATA[...]]> are not supported in SPARQL XML results",
890 )
891 .into()),
892 }
893 }
894}
895
896fn build_literal(
897 value: impl Into<String>,
898 lang: Option<String>,
899 datatype: Option<NamedNode>,
900) -> Result<Literal, QueryResultsParseError> {
901 match lang {
902 Some(lang) => {
903 if let Some(datatype) = datatype {
904 if datatype.as_ref() != rdf::LANG_STRING {
905 return Err(QueryResultsSyntaxError::msg(format!(
906 "xml:lang value '{lang}' provided with the datatype {datatype}"
907 ))
908 .into());
909 }
910 }
911 Literal::new_language_tagged_literal(value, &lang).map_err(|e| {
912 QueryResultsSyntaxError::msg(format!("Invalid xml:lang value '{lang}': {e}")).into()
913 })
914 }
915 None => Ok(if let Some(datatype) = datatype {
916 Literal::new_typed_literal(value, datatype)
917 } else {
918 Literal::new_simple_literal(value)
919 }),
920 }
921}
922
923fn escape_including_bound_whitespaces(value: &str) -> Cow<'_, str> {
925 let trimmed = value.trim_matches(|c| matches!(c, '\t' | '\n' | '\r' | ' '));
926 let trimmed_escaped = escape(trimmed);
927 if trimmed == value {
928 return trimmed_escaped;
929 }
930 let mut output =
931 String::with_capacity(trimmed_escaped.len() + (value.len() - trimmed.len()) * 5);
932 let mut prefix_len = 0;
933 for c in value.chars() {
934 match c {
935 '\t' => output.push_str("	"),
936 '\n' => output.push_str(" "),
937 '\r' => output.push_str(" "),
938 ' ' => output.push_str(" "),
939 _ => break,
940 }
941 prefix_len += 1;
942 }
943 output.push_str(&trimmed_escaped);
944 for c in value[prefix_len + trimmed.len()..].chars() {
945 match c {
946 '\t' => output.push_str("	"),
947 '\n' => output.push_str(" "),
948 '\r' => output.push_str(" "),
949 ' ' => output.push_str(" "),
950 _ => {
951 unreachable!("Unexpected {c} at the end of the string {value:?}")
952 }
953 }
954 }
955 output.into()
956}
957
958#[cfg(feature = "async-tokio")]
959fn map_xml_error(error: Error) -> io::Error {
960 match error {
961 Error::Io(error) => {
962 Arc::try_unwrap(error).unwrap_or_else(|error| io::Error::new(error.kind(), error))
963 }
964 _ => io::Error::new(io::ErrorKind::InvalidData, error),
965 }
966}