1use crate::utils::*;
2use oxiri::{Iri, IriParseError};
3use oxrdf::vocab::rdf;
4use oxrdf::{NamedNodeRef, Subject, SubjectRef, TermRef, TripleRef};
5use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event};
6use quick_xml::Writer;
7use std::borrow::Cow;
8use std::collections::BTreeMap;
9use std::io;
10use std::io::Write;
11#[cfg(feature = "async-tokio")]
12use std::sync::Arc;
13#[cfg(feature = "async-tokio")]
14use tokio::io::AsyncWrite;
15
16#[derive(Default, Clone)]
41#[must_use]
42pub struct RdfXmlSerializer {
43 prefixes: BTreeMap<String, String>,
44 base_iri: Option<Iri<String>>,
45}
46
47impl RdfXmlSerializer {
48 #[inline]
50 pub fn new() -> Self {
51 Self {
52 prefixes: BTreeMap::new(),
53 base_iri: None,
54 }
55 }
56
57 #[inline]
58 pub fn with_prefix(
59 mut self,
60 prefix_name: impl Into<String>,
61 prefix_iri: impl Into<String>,
62 ) -> Result<Self, IriParseError> {
63 let prefix_name = prefix_name.into();
64 if prefix_name == "oxprefix" {
65 return Ok(self); }
67 self.prefixes
68 .insert(prefix_name, Iri::parse(prefix_iri.into())?.into_inner());
69 Ok(self)
70 }
71
72 #[inline]
97 pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
98 self.base_iri = Some(Iri::parse(base_iri.into())?);
99 Ok(self)
100 }
101
102 #[allow(clippy::unused_self)]
129 pub fn for_writer<W: Write>(self, writer: W) -> WriterRdfXmlSerializer<W> {
130 WriterRdfXmlSerializer {
131 writer: Writer::new_with_indent(writer, b'\t', 1),
132 inner: self.inner_writer(),
133 }
134 }
135
136 #[allow(clippy::unused_self)]
166 #[cfg(feature = "async-tokio")]
167 pub fn for_tokio_async_writer<W: AsyncWrite + Unpin>(
168 self,
169 writer: W,
170 ) -> TokioAsyncWriterRdfXmlSerializer<W> {
171 TokioAsyncWriterRdfXmlSerializer {
172 writer: Writer::new_with_indent(writer, b'\t', 1),
173 inner: self.inner_writer(),
174 }
175 }
176
177 fn inner_writer(mut self) -> InnerRdfXmlWriter {
178 self.prefixes.remove("rdf");
180 let custom_default_prefix = self.prefixes.contains_key("");
181 let mut prefixes = self
183 .prefixes
184 .into_iter()
185 .map(|(key, value)| (value, key))
186 .collect::<BTreeMap<_, _>>();
187 prefixes.insert(
188 "http://www.w3.org/1999/02/22-rdf-syntax-ns#".into(),
189 "rdf".into(),
190 );
191 InnerRdfXmlWriter {
192 current_subject: None,
193 current_resource_tag: None,
194 custom_default_prefix,
195 prefixes_by_iri: prefixes,
196 base_iri: self.base_iri,
197 }
198 }
199}
200
201#[must_use]
228pub struct WriterRdfXmlSerializer<W: Write> {
229 writer: Writer<W>,
230 inner: InnerRdfXmlWriter,
231}
232
233impl<W: Write> WriterRdfXmlSerializer<W> {
234 pub fn serialize_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
236 let mut buffer = Vec::new();
237 self.inner.serialize_triple(t, &mut buffer)?;
238 self.flush_buffer(&mut buffer)
239 }
240
241 pub fn finish(mut self) -> io::Result<W> {
243 let mut buffer = Vec::new();
244 self.inner.finish(&mut buffer);
245 self.flush_buffer(&mut buffer)?;
246 Ok(self.writer.into_inner())
247 }
248
249 fn flush_buffer(&mut self, buffer: &mut Vec<Event<'_>>) -> io::Result<()> {
250 for event in buffer.drain(0..) {
251 self.writer.write_event(event)?;
252 }
253 Ok(())
254 }
255}
256
257#[cfg(feature = "async-tokio")]
287#[must_use]
288pub struct TokioAsyncWriterRdfXmlSerializer<W: AsyncWrite + Unpin> {
289 writer: Writer<W>,
290 inner: InnerRdfXmlWriter,
291}
292
293#[cfg(feature = "async-tokio")]
294impl<W: AsyncWrite + Unpin> TokioAsyncWriterRdfXmlSerializer<W> {
295 pub async fn serialize_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
297 let mut buffer = Vec::new();
298 self.inner.serialize_triple(t, &mut buffer)?;
299 self.flush_buffer(&mut buffer).await
300 }
301
302 pub async fn finish(mut self) -> io::Result<W> {
304 let mut buffer = Vec::new();
305 self.inner.finish(&mut buffer);
306 self.flush_buffer(&mut buffer).await?;
307 Ok(self.writer.into_inner())
308 }
309
310 async fn flush_buffer(&mut self, buffer: &mut Vec<Event<'_>>) -> io::Result<()> {
311 for event in buffer.drain(0..) {
312 self.writer
313 .write_event_async(event)
314 .await
315 .map_err(map_err)?;
316 }
317 Ok(())
318 }
319}
320
321const RESERVED_SYNTAX_TERMS: [&str; 9] = [
322 "http://www.w3.org/1999/02/22-rdf-syntax-ns#Description",
323 "http://www.w3.org/1999/02/22-rdf-syntax-ns#li",
324 "http://www.w3.org/1999/02/22-rdf-syntax-ns#RDF",
325 "http://www.w3.org/1999/02/22-rdf-syntax-ns#ID",
326 "http://www.w3.org/1999/02/22-rdf-syntax-ns#about",
327 "http://www.w3.org/1999/02/22-rdf-syntax-ns#parseType",
328 "http://www.w3.org/1999/02/22-rdf-syntax-ns#resource",
329 "http://www.w3.org/1999/02/22-rdf-syntax-ns#nodeID",
330 "http://www.w3.org/1999/02/22-rdf-syntax-ns#datatype",
331];
332
333pub struct InnerRdfXmlWriter {
334 current_subject: Option<Subject>,
335 current_resource_tag: Option<String>,
336 custom_default_prefix: bool,
337 prefixes_by_iri: BTreeMap<String, String>,
338 base_iri: Option<Iri<String>>,
339}
340
341impl InnerRdfXmlWriter {
342 fn serialize_triple<'a>(
343 &mut self,
344 t: impl Into<TripleRef<'a>>,
345 output: &mut Vec<Event<'a>>,
346 ) -> io::Result<()> {
347 if self.current_subject.is_none() {
348 self.write_start(output);
349 }
350
351 let triple = t.into();
352 if self.current_subject.as_ref().map(Subject::as_ref) != Some(triple.subject) {
354 if self.current_subject.is_some() {
355 output.push(Event::End(
356 self.current_resource_tag
357 .take()
358 .map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new),
359 ));
360 }
361 self.current_subject = Some(triple.subject.into_owned());
362
363 let (mut description_open, with_type_tag) = if triple.predicate == rdf::TYPE {
364 if let TermRef::NamedNode(t) = triple.object {
365 if RESERVED_SYNTAX_TERMS.contains(&t.as_str()) {
366 (BytesStart::new("rdf:Description"), false)
367 } else {
368 let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(t);
369 let mut description_open = BytesStart::new(prop_qname.clone());
370 if let Some(prop_xmlns) = prop_xmlns {
371 description_open.push_attribute(prop_xmlns);
372 }
373 self.current_resource_tag = Some(prop_qname.into_owned());
374 (description_open, true)
375 }
376 } else {
377 (BytesStart::new("rdf:Description"), false)
378 }
379 } else {
380 (BytesStart::new("rdf:Description"), false)
381 };
382 #[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)]
383 match triple.subject {
384 SubjectRef::NamedNode(node) => description_open
385 .push_attribute(("rdf:about", relative_iri(node.as_str(), &self.base_iri))),
386 SubjectRef::BlankNode(node) => {
387 description_open.push_attribute(("rdf:nodeID", node.as_str()))
388 }
389 _ => {
390 return Err(io::Error::new(
391 io::ErrorKind::InvalidInput,
392 "RDF/XML only supports named or blank subject",
393 ))
394 }
395 }
396 output.push(Event::Start(description_open));
397 if with_type_tag {
398 return Ok(()); }
400 }
401
402 if RESERVED_SYNTAX_TERMS.contains(&triple.predicate.as_str()) {
403 return Err(io::Error::new(
404 io::ErrorKind::InvalidInput,
405 "RDF/XML reserved syntax term is not allowed as a predicate",
406 ));
407 }
408 let (prop_qname, prop_xmlns) = self.uri_to_qname_and_xmlns(triple.predicate);
409 let mut property_open = BytesStart::new(prop_qname.clone());
410 if let Some(prop_xmlns) = prop_xmlns {
411 property_open.push_attribute(prop_xmlns);
412 }
413 #[allow(clippy::match_wildcard_for_single_variants, unreachable_patterns)]
414 let content = match triple.object {
415 TermRef::NamedNode(node) => {
416 property_open
417 .push_attribute(("rdf:resource", relative_iri(node.as_str(), &self.base_iri)));
418 None
419 }
420 TermRef::BlankNode(node) => {
421 property_open.push_attribute(("rdf:nodeID", node.as_str()));
422 None
423 }
424 TermRef::Literal(literal) => {
425 if let Some(language) = literal.language() {
426 property_open.push_attribute(("xml:lang", language));
427 } else if !literal.is_plain() {
428 property_open.push_attribute((
429 "rdf:datatype",
430 relative_iri(literal.datatype().as_str(), &self.base_iri),
431 ));
432 }
433 Some(literal.value())
434 }
435 _ => {
436 return Err(io::Error::new(
437 io::ErrorKind::InvalidInput,
438 "RDF/XML only supports named, blank or literal object",
439 ))
440 }
441 };
442 if let Some(content) = content {
443 output.push(Event::Start(property_open));
444 output.push(Event::Text(BytesText::new(content)));
445 output.push(Event::End(BytesEnd::new(prop_qname)));
446 } else {
447 output.push(Event::Empty(property_open));
448 }
449 Ok(())
450 }
451
452 fn write_start(&self, output: &mut Vec<Event<'_>>) {
453 output.push(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None)));
454 let mut rdf_open = BytesStart::new("rdf:RDF");
455 if let Some(base_iri) = &self.base_iri {
456 rdf_open.push_attribute(("xml:base", base_iri.as_str()));
457 }
458 for (prefix_value, prefix_name) in &self.prefixes_by_iri {
459 rdf_open.push_attribute((
460 if prefix_name.is_empty() {
461 "xmlns".into()
462 } else {
463 format!("xmlns:{prefix_name}")
464 }
465 .as_str(),
466 prefix_value.as_str(),
467 ));
468 }
469 output.push(Event::Start(rdf_open))
470 }
471
472 fn finish(&mut self, output: &mut Vec<Event<'static>>) {
473 if self.current_subject.is_some() {
474 output.push(Event::End(
475 self.current_resource_tag
476 .take()
477 .map_or_else(|| BytesEnd::new("rdf:Description"), BytesEnd::new),
478 ));
479 } else {
480 self.write_start(output);
481 }
482 output.push(Event::End(BytesEnd::new("rdf:RDF")));
483 }
484
485 fn uri_to_qname_and_xmlns<'a>(
486 &self,
487 uri: NamedNodeRef<'a>,
488 ) -> (Cow<'a, str>, Option<(&'a str, &'a str)>) {
489 let (prop_prefix, prop_value) = split_iri(uri.as_str());
490 if let Some(prop_prefix) = self.prefixes_by_iri.get(prop_prefix) {
491 (
492 if prop_prefix.is_empty() {
493 Cow::Borrowed(prop_value)
494 } else {
495 Cow::Owned(format!("{prop_prefix}:{prop_value}"))
496 },
497 None,
498 )
499 } else if prop_prefix == "http://www.w3.org/2000/xmlns/" {
500 (Cow::Owned(format!("xmlns:{prop_value}")), None)
501 } else if !prop_value.is_empty() && !self.custom_default_prefix {
502 (Cow::Borrowed(prop_value), Some(("xmlns", prop_prefix)))
503 } else {
504 (
506 Cow::Owned(format!("oxprefix:{prop_value}")),
507 Some(("xmlns:oxprefix", prop_prefix)),
508 )
509 }
510 }
511}
512
513#[cfg(feature = "async-tokio")]
514fn map_err(error: quick_xml::Error) -> io::Error {
515 if let quick_xml::Error::Io(error) = error {
516 Arc::try_unwrap(error).unwrap_or_else(|error| io::Error::new(error.kind(), error))
517 } else {
518 io::Error::other(error)
519 }
520}
521
522fn split_iri(iri: &str) -> (&str, &str) {
523 if let Some(position_base) = iri.rfind(|c| !is_name_char(c) || c == ':') {
524 if let Some(position_add) = iri[position_base..].find(|c| is_name_start_char(c) && c != ':')
525 {
526 (
527 &iri[..position_base + position_add],
528 &iri[position_base + position_add..],
529 )
530 } else {
531 (iri, "")
532 }
533 } else {
534 (iri, "")
535 }
536}
537
538fn relative_iri<'a>(iri: &'a str, base_iri: &Option<Iri<String>>) -> Cow<'a, str> {
539 if let Some(base_iri) = base_iri {
540 if let Ok(relative) = base_iri.relativize(&Iri::parse_unchecked(iri)) {
541 return relative.into_inner().into();
542 }
543 }
544 iri.into()
545}
546
547#[cfg(test)]
548#[allow(clippy::panic_in_result_fn)]
549mod tests {
550 use super::*;
551 use std::error::Error;
552
553 #[test]
554 fn test_split_iri() {
555 assert_eq!(
556 split_iri("http://schema.org/Person"),
557 ("http://schema.org/", "Person")
558 );
559 assert_eq!(split_iri("http://schema.org/"), ("http://schema.org/", ""));
560 assert_eq!(
561 split_iri("http://schema.org#foo"),
562 ("http://schema.org#", "foo")
563 );
564 assert_eq!(split_iri("urn:isbn:foo"), ("urn:isbn:", "foo"));
565 }
566
567 #[test]
568 fn test_custom_rdf_ns() -> Result<(), Box<dyn Error>> {
569 let output = RdfXmlSerializer::new()
570 .with_prefix("rdf", "http://example.com/")?
571 .for_writer(Vec::new())
572 .finish()?;
573 assert_eq!(output, b"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n</rdf:RDF>");
574 Ok(())
575 }
576
577 #[test]
578 fn test_custom_empty_ns() -> Result<(), Box<dyn Error>> {
579 let mut serializer = RdfXmlSerializer::new()
580 .with_prefix("", "http://example.com/")?
581 .for_writer(Vec::new());
582 serializer.serialize_triple(TripleRef::new(
583 NamedNodeRef::new("http://example.com/s")?,
584 rdf::TYPE,
585 NamedNodeRef::new("http://example.org/o")?,
586 ))?;
587 serializer.serialize_triple(TripleRef::new(
588 NamedNodeRef::new("http://example.com/s")?,
589 NamedNodeRef::new("http://example.com/p")?,
590 NamedNodeRef::new("http://example.com/o2")?,
591 ))?;
592 let output = serializer.finish()?;
593 assert_eq!(String::from_utf8_lossy(&output), "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<rdf:RDF xmlns=\"http://example.com/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n\t<oxprefix:o xmlns:oxprefix=\"http://example.org/\" rdf:about=\"http://example.com/s\">\n\t\t<p rdf:resource=\"http://example.com/o2\"/>\n\t</oxprefix:o>\n</rdf:RDF>");
594 Ok(())
595 }
596}