sparesults/
xml.rs

1//! Implementation of [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/)
2
3use crate::error::{QueryResultsParseError, QueryResultsSyntaxError};
4use oxrdf::vocab::rdf;
5use oxrdf::*;
6use quick_xml::escape::{escape, unescape};
7use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event};
8use quick_xml::reader::Config;
9use quick_xml::{Decoder, Error, Reader, Writer};
10use std::borrow::Cow;
11use std::collections::HashMap;
12use std::io::{self, BufReader, Read, Write};
13use std::mem::take;
14#[cfg(feature = "async-tokio")]
15use std::sync::Arc;
16#[cfg(feature = "async-tokio")]
17use tokio::io::{AsyncRead, AsyncWrite, BufReader as AsyncBufReader};
18
19pub fn write_boolean_xml_result<W: Write>(writer: W, value: bool) -> io::Result<W> {
20    let mut writer = Writer::new(writer);
21    for event in inner_write_boolean_xml_result(value) {
22        writer.write_event(event)?;
23    }
24    Ok(writer.into_inner())
25}
26
27#[cfg(feature = "async-tokio")]
28pub async fn tokio_async_write_boolean_xml_result<W: AsyncWrite + Unpin>(
29    writer: W,
30    value: bool,
31) -> io::Result<W> {
32    let mut writer = Writer::new(writer);
33    for event in inner_write_boolean_xml_result(value) {
34        writer
35            .write_event_async(event)
36            .await
37            .map_err(map_xml_error)?;
38    }
39    Ok(writer.into_inner())
40}
41
42fn inner_write_boolean_xml_result(value: bool) -> [Event<'static>; 8] {
43    [
44        Event::Decl(BytesDecl::new("1.0", None, None)),
45        Event::Start(
46            BytesStart::new("sparql")
47                .with_attributes([("xmlns", "http://www.w3.org/2005/sparql-results#")]),
48        ),
49        Event::Start(BytesStart::new("head")),
50        Event::End(BytesEnd::new("head")),
51        Event::Start(BytesStart::new("boolean")),
52        Event::Text(BytesText::new(if value { "true" } else { "false" })),
53        Event::End(BytesEnd::new("boolean")),
54        Event::End(BytesEnd::new("sparql")),
55    ]
56}
57
58pub struct WriterXmlSolutionsSerializer<W: Write> {
59    inner: InnerXmlSolutionsSerializer,
60    writer: Writer<W>,
61}
62
63impl<W: Write> WriterXmlSolutionsSerializer<W> {
64    pub fn start(writer: W, variables: &[Variable]) -> io::Result<Self> {
65        let mut writer = Writer::new(writer);
66        let mut buffer = Vec::with_capacity(48);
67        let inner = InnerXmlSolutionsSerializer::start(&mut buffer, variables);
68        Self::do_write(&mut writer, buffer)?;
69        Ok(Self { inner, writer })
70    }
71
72    pub fn serialize<'a>(
73        &mut self,
74        solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>,
75    ) -> io::Result<()> {
76        let mut buffer = Vec::with_capacity(48);
77        self.inner.write(&mut buffer, solution);
78        Self::do_write(&mut self.writer, buffer)
79    }
80
81    pub fn finish(mut self) -> io::Result<W> {
82        let mut buffer = Vec::with_capacity(4);
83        self.inner.finish(&mut buffer);
84        Self::do_write(&mut self.writer, buffer)?;
85        Ok(self.writer.into_inner())
86    }
87
88    fn do_write(writer: &mut Writer<W>, output: Vec<Event<'_>>) -> io::Result<()> {
89        for event in output {
90            writer.write_event(event)?;
91        }
92        Ok(())
93    }
94}
95
96#[cfg(feature = "async-tokio")]
97pub struct TokioAsyncWriterXmlSolutionsSerializer<W: AsyncWrite + Unpin> {
98    inner: InnerXmlSolutionsSerializer,
99    writer: Writer<W>,
100}
101
102#[cfg(feature = "async-tokio")]
103impl<W: AsyncWrite + Unpin> TokioAsyncWriterXmlSolutionsSerializer<W> {
104    pub async fn start(writer: W, variables: &[Variable]) -> io::Result<Self> {
105        let mut writer = Writer::new(writer);
106        let mut buffer = Vec::with_capacity(48);
107        let inner = InnerXmlSolutionsSerializer::start(&mut buffer, variables);
108        Self::do_write(&mut writer, buffer).await?;
109        Ok(Self { inner, writer })
110    }
111
112    pub async fn serialize<'a>(
113        &mut self,
114        solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>,
115    ) -> io::Result<()> {
116        let mut buffer = Vec::with_capacity(48);
117        self.inner.write(&mut buffer, solution);
118        Self::do_write(&mut self.writer, buffer).await
119    }
120
121    pub async fn finish(mut self) -> io::Result<W> {
122        let mut buffer = Vec::with_capacity(4);
123        self.inner.finish(&mut buffer);
124        Self::do_write(&mut self.writer, buffer).await?;
125        Ok(self.writer.into_inner())
126    }
127
128    async fn do_write(writer: &mut Writer<W>, output: Vec<Event<'_>>) -> io::Result<()> {
129        for event in output {
130            writer
131                .write_event_async(event)
132                .await
133                .map_err(map_xml_error)?;
134        }
135        Ok(())
136    }
137}
138
139struct InnerXmlSolutionsSerializer;
140
141impl InnerXmlSolutionsSerializer {
142    fn start<'a>(output: &mut Vec<Event<'a>>, variables: &'a [Variable]) -> Self {
143        output.push(Event::Decl(BytesDecl::new("1.0", None, None)));
144        output.push(Event::Start(BytesStart::new("sparql").with_attributes([(
145            "xmlns",
146            "http://www.w3.org/2005/sparql-results#",
147        )])));
148        output.push(Event::Start(BytesStart::new("head")));
149        for variable in variables {
150            output.push(Event::Empty(
151                BytesStart::new("variable").with_attributes([("name", variable.as_str())]),
152            ));
153        }
154        output.push(Event::End(BytesEnd::new("head")));
155        output.push(Event::Start(BytesStart::new("results")));
156        Self {}
157    }
158
159    #[allow(clippy::unused_self)]
160    fn write<'a>(
161        &self,
162        output: &mut Vec<Event<'a>>,
163        solution: impl IntoIterator<Item = (VariableRef<'a>, TermRef<'a>)>,
164    ) {
165        output.push(Event::Start(BytesStart::new("result")));
166        for (variable, value) in solution {
167            output.push(Event::Start(
168                BytesStart::new("binding").with_attributes([("name", variable.as_str())]),
169            ));
170            write_xml_term(output, value);
171            output.push(Event::End(BytesEnd::new("binding")));
172        }
173        output.push(Event::End(BytesEnd::new("result")));
174    }
175
176    #[allow(clippy::unused_self)]
177    fn finish(self, output: &mut Vec<Event<'_>>) {
178        output.push(Event::End(BytesEnd::new("results")));
179        output.push(Event::End(BytesEnd::new("sparql")));
180    }
181}
182
183fn write_xml_term<'a>(output: &mut Vec<Event<'a>>, term: TermRef<'a>) {
184    match term {
185        TermRef::NamedNode(uri) => {
186            output.push(Event::Start(BytesStart::new("uri")));
187            output.push(Event::Text(BytesText::new(uri.as_str())));
188            output.push(Event::End(BytesEnd::new("uri")));
189        }
190        TermRef::BlankNode(bnode) => {
191            output.push(Event::Start(BytesStart::new("bnode")));
192            output.push(Event::Text(BytesText::new(bnode.as_str())));
193            output.push(Event::End(BytesEnd::new("bnode")));
194        }
195        TermRef::Literal(literal) => {
196            let mut start = BytesStart::new("literal");
197            if let Some(language) = literal.language() {
198                start.push_attribute(("xml:lang", language));
199            } else if !literal.is_plain() {
200                start.push_attribute(("datatype", literal.datatype().as_str()))
201            }
202            output.push(Event::Start(start));
203            output.push(Event::Text(BytesText::from_escaped(
204                escape_including_bound_whitespaces(literal.value()),
205            )));
206            output.push(Event::End(BytesEnd::new("literal")));
207        }
208        #[cfg(feature = "rdf-star")]
209        TermRef::Triple(triple) => {
210            output.push(Event::Start(BytesStart::new("triple")));
211            output.push(Event::Start(BytesStart::new("subject")));
212            write_xml_term(output, triple.subject.as_ref().into());
213            output.push(Event::End(BytesEnd::new("subject")));
214            output.push(Event::Start(BytesStart::new("predicate")));
215            write_xml_term(output, triple.predicate.as_ref().into());
216            output.push(Event::End(BytesEnd::new("predicate")));
217            output.push(Event::Start(BytesStart::new("object")));
218            write_xml_term(output, triple.object.as_ref());
219            output.push(Event::End(BytesEnd::new("object")));
220            output.push(Event::End(BytesEnd::new("triple")));
221        }
222    }
223}
224
225#[allow(clippy::large_enum_variant)]
226pub enum ReaderXmlQueryResultsParserOutput<R: Read> {
227    Solutions {
228        variables: Vec<Variable>,
229        solutions: ReaderXmlSolutionsParser<R>,
230    },
231    Boolean(bool),
232}
233
234impl<R: Read> ReaderXmlQueryResultsParserOutput<R> {
235    pub fn read(reader: R) -> Result<Self, QueryResultsParseError> {
236        let mut reader = Reader::from_reader(BufReader::new(reader));
237        XmlInnerQueryResultsParser::set_options(reader.config_mut());
238        let mut reader_buffer = Vec::new();
239        let mut inner = XmlInnerQueryResultsParser {
240            state: ResultsState::Start,
241            variables: Vec::new(),
242            decoder: reader.decoder(),
243        };
244        loop {
245            reader_buffer.clear();
246            let event = reader.read_event_into(&mut reader_buffer)?;
247            if let Some(result) = inner.read_event(event)? {
248                return Ok(match result {
249                    XmlInnerQueryResults::Solutions {
250                        variables,
251                        solutions,
252                    } => Self::Solutions {
253                        variables,
254                        solutions: ReaderXmlSolutionsParser {
255                            reader,
256                            inner: solutions,
257                            reader_buffer,
258                        },
259                    },
260                    XmlInnerQueryResults::Boolean(value) => Self::Boolean(value),
261                });
262            }
263        }
264    }
265}
266
267pub struct ReaderXmlSolutionsParser<R: Read> {
268    reader: Reader<BufReader<R>>,
269    inner: XmlInnerSolutionsParser,
270    reader_buffer: Vec<u8>,
271}
272
273impl<R: Read> ReaderXmlSolutionsParser<R> {
274    pub fn parse_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
275        loop {
276            self.reader_buffer.clear();
277            let event = self.reader.read_event_into(&mut self.reader_buffer)?;
278            if event == Event::Eof {
279                return Ok(None);
280            }
281            if let Some(solution) = self.inner.read_event(event)? {
282                return Ok(Some(solution));
283            }
284        }
285    }
286}
287
288#[cfg(feature = "async-tokio")]
289#[allow(clippy::large_enum_variant)]
290pub enum TokioAsyncReaderXmlQueryResultsParserOutput<R: AsyncRead + Unpin> {
291    Solutions {
292        variables: Vec<Variable>,
293        solutions: TokioAsyncReaderXmlSolutionsParser<R>,
294    },
295    Boolean(bool),
296}
297
298#[cfg(feature = "async-tokio")]
299impl<R: AsyncRead + Unpin> TokioAsyncReaderXmlQueryResultsParserOutput<R> {
300    pub async fn read(reader: R) -> Result<Self, QueryResultsParseError> {
301        let mut reader = Reader::from_reader(AsyncBufReader::new(reader));
302        XmlInnerQueryResultsParser::set_options(reader.config_mut());
303        let mut reader_buffer = Vec::new();
304        let mut inner = XmlInnerQueryResultsParser {
305            state: ResultsState::Start,
306            variables: Vec::new(),
307            decoder: reader.decoder(),
308        };
309        loop {
310            reader_buffer.clear();
311            let event = reader.read_event_into_async(&mut reader_buffer).await?;
312            if let Some(result) = inner.read_event(event)? {
313                return Ok(match result {
314                    XmlInnerQueryResults::Solutions {
315                        variables,
316                        solutions,
317                    } => Self::Solutions {
318                        variables,
319                        solutions: TokioAsyncReaderXmlSolutionsParser {
320                            reader,
321                            inner: solutions,
322                            reader_buffer,
323                        },
324                    },
325                    XmlInnerQueryResults::Boolean(value) => Self::Boolean(value),
326                });
327            }
328        }
329    }
330}
331
332#[cfg(feature = "async-tokio")]
333pub struct TokioAsyncReaderXmlSolutionsParser<R: AsyncRead + Unpin> {
334    reader: Reader<AsyncBufReader<R>>,
335    inner: XmlInnerSolutionsParser,
336    reader_buffer: Vec<u8>,
337}
338
339#[cfg(feature = "async-tokio")]
340impl<R: AsyncRead + Unpin> TokioAsyncReaderXmlSolutionsParser<R> {
341    pub async fn parse_next(
342        &mut self,
343    ) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
344        loop {
345            self.reader_buffer.clear();
346            let event = self
347                .reader
348                .read_event_into_async(&mut self.reader_buffer)
349                .await?;
350            if event == Event::Eof {
351                return Ok(None);
352            }
353            if let Some(solution) = self.inner.read_event(event)? {
354                return Ok(Some(solution));
355            }
356        }
357    }
358}
359
360#[allow(clippy::large_enum_variant)]
361pub enum SliceXmlQueryResultsParserOutput<'a> {
362    Solutions {
363        variables: Vec<Variable>,
364        solutions: SliceXmlSolutionsParser<'a>,
365    },
366    Boolean(bool),
367}
368
369impl<'a> SliceXmlQueryResultsParserOutput<'a> {
370    pub fn read(slice: &'a [u8]) -> Result<Self, QueryResultsSyntaxError> {
371        Self::do_read(slice).map_err(|e| match e {
372            QueryResultsParseError::Syntax(e) => e,
373            QueryResultsParseError::Io(e) => {
374                unreachable!("I/O error are not possible for slice but found {e}")
375            }
376        })
377    }
378
379    fn do_read(slice: &'a [u8]) -> Result<Self, QueryResultsParseError> {
380        let mut reader = Reader::from_reader(slice);
381        XmlInnerQueryResultsParser::set_options(reader.config_mut());
382        let mut reader_buffer = Vec::new();
383        let mut inner = XmlInnerQueryResultsParser {
384            state: ResultsState::Start,
385            variables: Vec::new(),
386            decoder: reader.decoder(),
387        };
388        loop {
389            reader_buffer.clear();
390            let event = reader.read_event_into(&mut reader_buffer)?;
391            if let Some(result) = inner.read_event(event)? {
392                return Ok(match result {
393                    XmlInnerQueryResults::Solutions {
394                        variables,
395                        solutions,
396                    } => Self::Solutions {
397                        variables,
398                        solutions: SliceXmlSolutionsParser {
399                            reader,
400                            inner: solutions,
401                            reader_buffer,
402                        },
403                    },
404                    XmlInnerQueryResults::Boolean(value) => Self::Boolean(value),
405                });
406            }
407        }
408    }
409}
410
411pub struct SliceXmlSolutionsParser<'a> {
412    reader: Reader<&'a [u8]>,
413    inner: XmlInnerSolutionsParser,
414    reader_buffer: Vec<u8>,
415}
416
417impl SliceXmlSolutionsParser<'_> {
418    pub fn parse_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsSyntaxError> {
419        self.do_parse_next().map_err(|e| match e {
420            QueryResultsParseError::Syntax(e) => e,
421            QueryResultsParseError::Io(e) => {
422                unreachable!("I/O error are not possible for slice but found {e}")
423            }
424        })
425    }
426
427    fn do_parse_next(&mut self) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
428        loop {
429            self.reader_buffer.clear();
430            let event = self.reader.read_event_into(&mut self.reader_buffer)?;
431            if event == Event::Eof {
432                return Ok(None);
433            }
434            if let Some(solution) = self.inner.read_event(event)? {
435                return Ok(Some(solution));
436            }
437        }
438    }
439}
440
441#[allow(clippy::large_enum_variant)]
442enum XmlInnerQueryResults {
443    Solutions {
444        variables: Vec<Variable>,
445        solutions: XmlInnerSolutionsParser,
446    },
447    Boolean(bool),
448}
449
450#[derive(Clone, Copy)]
451enum ResultsState {
452    Start,
453    Sparql,
454    Head,
455    AfterHead,
456    Boolean,
457}
458
459struct XmlInnerQueryResultsParser {
460    state: ResultsState,
461    variables: Vec<Variable>,
462    decoder: Decoder,
463}
464
465impl XmlInnerQueryResultsParser {
466    fn set_options(config: &mut Config) {
467        config.trim_text(true);
468        config.expand_empty_elements = true;
469    }
470
471    pub fn read_event(
472        &mut self,
473        event: Event<'_>,
474    ) -> Result<Option<XmlInnerQueryResults>, QueryResultsParseError> {
475        match event {
476            Event::Start(event) => match self.state {
477                ResultsState::Start => {
478                    if event.local_name().as_ref() == b"sparql" {
479                        self.state = ResultsState::Sparql;
480                        Ok(None)
481                    } else {
482                        Err(QueryResultsSyntaxError::msg(format!("Expecting <sparql> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into())
483                    }
484                }
485                ResultsState::Sparql => {
486                    if event.local_name().as_ref() == b"head" {
487                        self.state = ResultsState::Head;
488                        Ok(None)
489                    } else {
490                        Err(QueryResultsSyntaxError::msg(format!("Expecting <head> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into())
491                    }
492                }
493                ResultsState::Head => {
494                    if event.local_name().as_ref() == b"variable" {
495                        let name = event.attributes()
496                            .filter_map(Result::ok)
497                            .find(|attr| attr.key.local_name().as_ref() == b"name")
498                            .ok_or_else(|| QueryResultsSyntaxError::msg("No name attribute found for the <variable> tag"))?;
499                        let name = unescape(&self.decoder.decode(&name.value)?)?.into_owned();
500                        let variable = Variable::new(name).map_err(|e| QueryResultsSyntaxError::msg(format!("Invalid variable name: {e}")))?;
501                        if self.variables.contains(&variable) {
502                            return Err(QueryResultsSyntaxError::msg(format!(
503                                "The variable {variable} is declared twice"
504                            ))
505                                .into());
506                        }
507                        self.variables.push(variable);
508                        Ok(None)
509                    } else if event.local_name().as_ref() == b"link" {
510                        // no op
511                        Ok(None)
512                    } else {
513                        Err(QueryResultsSyntaxError::msg(format!("Expecting <variable> or <link> tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into())
514                    }
515                }
516                ResultsState::AfterHead => {
517                    if event.local_name().as_ref() == b"boolean" {
518                        self.state = ResultsState::Boolean;
519                        Ok(None)
520                    } else if event.local_name().as_ref() == b"results" {
521                        let mut mapping = HashMap::new();
522                        for (i, var) in self.variables.iter().enumerate() {
523                            mapping.insert(var.clone().into_string(), i);
524                        }
525                        Ok(Some(XmlInnerQueryResults::Solutions {
526                            variables: take(&mut self.variables),
527                            solutions: XmlInnerSolutionsParser {
528                                decoder: self.decoder,
529                                mapping,
530                                state_stack: vec![State::Start, State::Start],
531                                new_bindings: Vec::new(),
532                                current_var: None,
533                                term: None,
534                                lang: None,
535                                datatype: None,
536                                subject_stack: Vec::new(),
537                                predicate_stack: Vec::new(),
538                                object_stack: Vec::new(),
539                            },
540                        }))
541                    } else if event.local_name().as_ref() != b"link" && event.local_name().as_ref() != b"results" && event.local_name().as_ref() != b"boolean" {
542                        Err(QueryResultsSyntaxError::msg(format!("Expecting sparql tag, found <{}>", self.decoder.decode(event.name().as_ref())?)).into())
543                    } else {
544                        Ok(None)
545                    }
546                }
547                ResultsState::Boolean => Err(QueryResultsSyntaxError::msg(format!("Unexpected tag inside of <boolean> tag: <{}>", self.decoder.decode(event.name().as_ref())?)).into())
548            },
549            Event::Text(event) => {
550                let value = event.unescape()?;
551                match self.state {
552                    ResultsState::Boolean => {
553                        if value == "true" {
554                            Ok(Some(XmlInnerQueryResults::Boolean(true)))
555                        } else if value == "false" {
556                            Ok(Some(XmlInnerQueryResults::Boolean(false)))
557                        } else {
558                            Err(QueryResultsSyntaxError::msg(format!("Unexpected boolean value. Found '{value}'")).into())
559                        }
560                    }
561                    _ => Err(QueryResultsSyntaxError::msg(format!("Unexpected textual value found: '{value}'")).into())
562                }
563            }
564            Event::End(event) => {
565                if let ResultsState::Head = self.state {
566                    if event.local_name().as_ref() == b"head" {
567                        self.state = ResultsState::AfterHead
568                    }
569                    Ok(None)
570                } else {
571                    Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag").into())
572                }
573            }
574            Event::Eof => Err(QueryResultsSyntaxError::msg("Unexpected early file end. All results file should have a <head> and a <result> or <boolean> tag").into()),
575            Event::Comment(_) | Event::Decl(_) | Event::PI(_) | Event::DocType(_) => {
576                Ok(None)
577            }
578            Event::Empty(_) => unreachable!("Empty events are expended"),
579            Event::CData(_) => {
580                Err(QueryResultsSyntaxError::msg(
581                    "<![CDATA[...]]> are not supported in SPARQL XML results",
582                )
583                    .into())
584            }
585        }
586    }
587}
588
589enum State {
590    Start,
591    Result,
592    Binding,
593    Uri,
594    BNode,
595    Literal,
596    Triple,
597    Subject,
598    Predicate,
599    Object,
600}
601
602struct XmlInnerSolutionsParser {
603    decoder: Decoder,
604    mapping: HashMap<String, usize>,
605    state_stack: Vec<State>,
606    new_bindings: Vec<Option<Term>>,
607    current_var: Option<String>,
608    term: Option<Term>,
609    lang: Option<String>,
610    datatype: Option<NamedNode>,
611    subject_stack: Vec<Term>,
612    predicate_stack: Vec<Term>,
613    object_stack: Vec<Term>,
614}
615
616impl XmlInnerSolutionsParser {
617    #[allow(clippy::unwrap_in_result)]
618    pub fn read_event(
619        &mut self,
620        event: Event<'_>,
621    ) -> Result<Option<Vec<Option<Term>>>, QueryResultsParseError> {
622        match event {
623            Event::Start(event) => match self.state_stack.last().ok_or_else(|| {
624                QueryResultsSyntaxError::msg("Extra XML is not allowed at the end of the document")
625            })? {
626                State::Start => {
627                    if event.local_name().as_ref() == b"result" {
628                        self.new_bindings = vec![None; self.mapping.len()];
629                        self.state_stack.push(State::Result);
630                        Ok(None)
631                    } else {
632                        Err(QueryResultsSyntaxError::msg(format!(
633                            "Expecting <result>, found <{}>",
634                            self.decoder.decode(event.name().as_ref())?
635                        ))
636                        .into())
637                    }
638                }
639                State::Result => {
640                    if event.local_name().as_ref() == b"binding" {
641                        let Some(attr) = event
642                            .attributes()
643                            .filter_map(Result::ok)
644                            .find(|attr| attr.key.local_name().as_ref() == b"name")
645                        else {
646                            return Err(QueryResultsSyntaxError::msg(
647                                "No name attribute found for the <binding> tag",
648                            )
649                            .into());
650                        };
651                        self.current_var =
652                            Some(unescape(&self.decoder.decode(&attr.value)?)?.into_owned());
653                        self.state_stack.push(State::Binding);
654                        Ok(None)
655                    } else {
656                        Err(QueryResultsSyntaxError::msg(format!(
657                            "Expecting <binding>, found <{}>",
658                            self.decoder.decode(event.name().as_ref())?
659                        ))
660                        .into())
661                    }
662                }
663                State::Binding | State::Subject | State::Predicate | State::Object => {
664                    if self.term.is_some() {
665                        return Err(QueryResultsSyntaxError::msg(
666                            "There is already a value for the current binding",
667                        )
668                        .into());
669                    }
670                    if event.local_name().as_ref() == b"uri" {
671                        self.state_stack.push(State::Uri);
672                        Ok(None)
673                    } else if event.local_name().as_ref() == b"bnode" {
674                        self.state_stack.push(State::BNode);
675                        Ok(None)
676                    } else if event.local_name().as_ref() == b"literal" {
677                        for attr in event.attributes() {
678                            let attr = attr.map_err(Error::from)?;
679                            if attr.key.as_ref() == b"xml:lang" {
680                                self.lang = Some(
681                                    unescape(&self.decoder.decode(&attr.value)?)?.into_owned(),
682                                );
683                            } else if attr.key.local_name().as_ref() == b"datatype" {
684                                let iri = self.decoder.decode(&attr.value)?;
685                                let iri = unescape(&iri)?;
686                                self.datatype =
687                                    Some(NamedNode::new(iri.as_ref()).map_err(|e| {
688                                        QueryResultsSyntaxError::msg(format!(
689                                            "Invalid datatype IRI '{iri}': {e}"
690                                        ))
691                                    })?);
692                            }
693                        }
694                        self.state_stack.push(State::Literal);
695                        Ok(None)
696                    } else if event.local_name().as_ref() == b"triple" {
697                        self.state_stack.push(State::Triple);
698                        Ok(None)
699                    } else {
700                        Err(QueryResultsSyntaxError::msg(format!(
701                            "Expecting <uri>, <bnode> or <literal> found <{}>",
702                            self.decoder.decode(event.name().as_ref())?
703                        ))
704                        .into())
705                    }
706                }
707                State::Triple => {
708                    if event.local_name().as_ref() == b"subject" {
709                        self.state_stack.push(State::Subject);
710                        Ok(None)
711                    } else if event.local_name().as_ref() == b"predicate" {
712                        self.state_stack.push(State::Predicate);
713                        Ok(None)
714                    } else if event.local_name().as_ref() == b"object" {
715                        self.state_stack.push(State::Object);
716                        Ok(None)
717                    } else {
718                        Err(QueryResultsSyntaxError::msg(format!(
719                            "Expecting <subject>, <predicate> or <object> found <{}>",
720                            self.decoder.decode(event.name().as_ref())?
721                        ))
722                        .into())
723                    }
724                }
725                State::Uri => Err(QueryResultsSyntaxError::msg(format!(
726                    "<uri> must only contain a string, found <{}>",
727                    self.decoder.decode(event.name().as_ref())?
728                ))
729                .into()),
730                State::BNode => Err(QueryResultsSyntaxError::msg(format!(
731                    "<uri> must only contain a string, found <{}>",
732                    self.decoder.decode(event.name().as_ref())?
733                ))
734                .into()),
735                State::Literal => Err(QueryResultsSyntaxError::msg(format!(
736                    "<uri> must only contain a string, found <{}>",
737                    self.decoder.decode(event.name().as_ref())?
738                ))
739                .into()),
740            },
741            Event::Text(event) => {
742                let data = event.unescape()?;
743                match self.state_stack.last() {
744                    Some(State::Uri) => {
745                        self.term = Some(
746                            NamedNode::new(data.to_string())
747                                .map_err(|e| {
748                                    QueryResultsSyntaxError::msg(format!(
749                                        "Invalid IRI value '{data}': {e}"
750                                    ))
751                                })?
752                                .into(),
753                        );
754                        Ok(None)
755                    }
756                    Some(State::BNode) => {
757                        self.term = Some(
758                            BlankNode::new(data.to_string())
759                                .map_err(|e| {
760                                    QueryResultsSyntaxError::msg(format!(
761                                        "Invalid blank node value '{data}': {e}"
762                                    ))
763                                })?
764                                .into(),
765                        );
766                        Ok(None)
767                    }
768                    Some(State::Literal) => {
769                        self.term = Some(
770                            build_literal(data, self.lang.take(), self.datatype.take())?.into(),
771                        );
772                        Ok(None)
773                    }
774                    _ => Err(QueryResultsSyntaxError::msg(format!(
775                        "Unexpected textual value found: {data}"
776                    ))
777                    .into()),
778                }
779            }
780            Event::End(_) => match self.state_stack.pop().ok_or_else(|| {
781                QueryResultsSyntaxError::msg("Extra XML is not allowed at the end of the document")
782            })? {
783                State::Start | State::Uri => Ok(None),
784                State::Result => Ok(Some(take(&mut self.new_bindings))),
785                State::Binding => {
786                    if let Some(var) = &self.current_var {
787                        if let Some(var) = self.mapping.get(var) {
788                            self.new_bindings[*var] = self.term.take()
789                        } else {
790                            return Err(
791                                QueryResultsSyntaxError::msg(format!("The variable '{var}' is used in a binding but not declared in the variables list")).into()
792                            );
793                        }
794                    } else {
795                        return Err(QueryResultsSyntaxError::msg(
796                            "No name found for <binding> tag",
797                        )
798                        .into());
799                    }
800                    Ok(None)
801                }
802                State::Subject => {
803                    if let Some(subject) = self.term.take() {
804                        self.subject_stack.push(subject)
805                    }
806                    Ok(None)
807                }
808                State::Predicate => {
809                    if let Some(predicate) = self.term.take() {
810                        self.predicate_stack.push(predicate)
811                    }
812                    Ok(None)
813                }
814                State::Object => {
815                    if let Some(object) = self.term.take() {
816                        self.object_stack.push(object)
817                    }
818                    Ok(None)
819                }
820                State::BNode => {
821                    if self.term.is_none() {
822                        // We default to a random bnode
823                        self.term = Some(BlankNode::default().into())
824                    }
825                    Ok(None)
826                }
827                State::Literal => {
828                    if self.term.is_none() {
829                        // We default to the empty literal
830                        self.term =
831                            Some(build_literal("", self.lang.take(), self.datatype.take())?.into())
832                    }
833                    Ok(None)
834                }
835                State::Triple => {
836                    #[cfg(feature = "rdf-star")]
837                    if let (Some(subject), Some(predicate), Some(object)) = (
838                        self.subject_stack.pop(),
839                        self.predicate_stack.pop(),
840                        self.object_stack.pop(),
841                    ) {
842                        self.term = Some(
843                            Triple::new(
844                                match subject {
845                                    Term::NamedNode(subject) => subject.into(),
846                                    Term::BlankNode(subject) => subject.into(),
847                                    Term::Triple(subject) => Subject::Triple(subject),
848                                    Term::Literal(_) => {
849                                        return Err(QueryResultsSyntaxError::msg(
850                                            "The <subject> value should not be a <literal>",
851                                        )
852                                        .into());
853                                    }
854                                },
855                                match predicate {
856                                    Term::NamedNode(predicate) => predicate,
857                                    _ => {
858                                        return Err(QueryResultsSyntaxError::msg(
859                                            "The <predicate> value should be an <uri>",
860                                        )
861                                        .into());
862                                    }
863                                },
864                                object,
865                            )
866                            .into(),
867                        );
868                        Ok(None)
869                    } else {
870                        Err(QueryResultsSyntaxError::msg(
871                            "A <triple> should contain a <subject>, a <predicate> and an <object>",
872                        )
873                        .into())
874                    }
875                    #[cfg(not(feature = "rdf-star"))]
876                    {
877                        Err(QueryResultsSyntaxError::msg(
878                            "The <triple> tag is only supported with RDF-star",
879                        )
880                        .into())
881                    }
882                }
883            },
884            Event::Eof | Event::Comment(_) | Event::Decl(_) | Event::PI(_) | Event::DocType(_) => {
885                Ok(None)
886            }
887            Event::Empty(_) => unreachable!("Empty events are expended"),
888            Event::CData(_) => Err(QueryResultsSyntaxError::msg(
889                "<![CDATA[...]]> are not supported in SPARQL XML results",
890            )
891            .into()),
892        }
893    }
894}
895
896fn build_literal(
897    value: impl Into<String>,
898    lang: Option<String>,
899    datatype: Option<NamedNode>,
900) -> Result<Literal, QueryResultsParseError> {
901    match lang {
902        Some(lang) => {
903            if let Some(datatype) = datatype {
904                if datatype.as_ref() != rdf::LANG_STRING {
905                    return Err(QueryResultsSyntaxError::msg(format!(
906                        "xml:lang value '{lang}' provided with the datatype {datatype}"
907                    ))
908                    .into());
909                }
910            }
911            Literal::new_language_tagged_literal(value, &lang).map_err(|e| {
912                QueryResultsSyntaxError::msg(format!("Invalid xml:lang value '{lang}': {e}")).into()
913            })
914        }
915        None => Ok(if let Some(datatype) = datatype {
916            Literal::new_typed_literal(value, datatype)
917        } else {
918            Literal::new_simple_literal(value)
919        }),
920    }
921}
922
923/// Escapes whitespaces at the beginning and the end to make sure they are not removed by parsers trimming text events.
924fn escape_including_bound_whitespaces(value: &str) -> Cow<'_, str> {
925    let trimmed = value.trim_matches(|c| matches!(c, '\t' | '\n' | '\r' | ' '));
926    let trimmed_escaped = escape(trimmed);
927    if trimmed == value {
928        return trimmed_escaped;
929    }
930    let mut output =
931        String::with_capacity(trimmed_escaped.len() + (value.len() - trimmed.len()) * 5);
932    let mut prefix_len = 0;
933    for c in value.chars() {
934        match c {
935            '\t' => output.push_str("&#9;"),
936            '\n' => output.push_str("&#10;"),
937            '\r' => output.push_str("&#13;"),
938            ' ' => output.push_str("&#32;"),
939            _ => break,
940        }
941        prefix_len += 1;
942    }
943    output.push_str(&trimmed_escaped);
944    for c in value[prefix_len + trimmed.len()..].chars() {
945        match c {
946            '\t' => output.push_str("&#9;"),
947            '\n' => output.push_str("&#10;"),
948            '\r' => output.push_str("&#13;"),
949            ' ' => output.push_str("&#32;"),
950            _ => {
951                unreachable!("Unexpected {c} at the end of the string {value:?}")
952            }
953        }
954    }
955    output.into()
956}
957
958#[cfg(feature = "async-tokio")]
959fn map_xml_error(error: Error) -> io::Error {
960    match error {
961        Error::Io(error) => {
962            Arc::try_unwrap(error).unwrap_or_else(|error| io::Error::new(error.kind(), error))
963        }
964        _ => io::Error::new(io::ErrorKind::InvalidData, error),
965    }
966}