1use crate::JsonEvent;
2use std::borrow::Cow;
3use std::cmp::{max, min};
4use std::error::Error;
5use std::io::{self, Read};
6use std::ops::Range;
7use std::{fmt, str};
8#[cfg(feature = "async-tokio")]
9use tokio::io::{AsyncRead, AsyncReadExt};
10
11const MAX_STATE_STACK_SIZE: usize = 65_536;
12const MIN_BUFFER_SIZE: usize = 4096;
13const MAX_BUFFER_SIZE: usize = 4096 * 4096;
14
15pub struct ReaderJsonParser<R: Read> {
30 input_buffer: Vec<u8>,
31 input_buffer_start: usize,
32 input_buffer_end: usize,
33 max_buffer_size: usize,
34 is_ending: bool,
35 read: R,
36 parser: LowLevelJsonParser,
37}
38
39impl<R: Read> ReaderJsonParser<R> {
40 pub const fn new(read: R) -> Self {
41 Self {
42 input_buffer: Vec::new(),
43 input_buffer_start: 0,
44 input_buffer_end: 0,
45 max_buffer_size: MAX_BUFFER_SIZE,
46 is_ending: false,
47 read,
48 parser: LowLevelJsonParser::new(),
49 }
50 }
51
52 pub fn with_max_buffer_size(mut self, size: usize) -> Self {
54 self.max_buffer_size = size;
55 self
56 }
57
58 pub fn parse_next(&mut self) -> Result<JsonEvent<'_>, JsonParseError> {
59 loop {
60 {
61 let LowLevelJsonParserResult {
62 event,
63 consumed_bytes,
64 } = self.parser.parse_next(
65 #[allow(unsafe_code)]
66 unsafe {
67 let input_buffer_ptr: *const [u8] =
68 &self.input_buffer[self.input_buffer_start..self.input_buffer_end];
69 &*input_buffer_ptr
70 }, self.is_ending,
72 );
73 self.input_buffer_start += consumed_bytes;
74 if let Some(event) = event {
75 return Ok(event?);
76 }
77 }
78 if self.input_buffer_start > 0 {
79 self.input_buffer
80 .copy_within(self.input_buffer_start..self.input_buffer_end, 0);
81 self.input_buffer_end -= self.input_buffer_start;
82 self.input_buffer_start = 0;
83 }
84 if self.input_buffer.len() == self.max_buffer_size {
85 return Err(io::Error::new(
86 io::ErrorKind::OutOfMemory,
87 format!(
88 "Reached the buffer maximal size of {}",
89 self.max_buffer_size
90 ),
91 )
92 .into());
93 }
94 let min_end = min(
95 self.input_buffer_end + MIN_BUFFER_SIZE,
96 self.max_buffer_size,
97 );
98 if self.input_buffer.len() < min_end {
99 self.input_buffer.resize(min_end, 0);
100 }
101 if self.input_buffer.len() < self.input_buffer.capacity() {
102 self.input_buffer.resize(self.input_buffer.capacity(), 0);
104 }
105 let read = self
106 .read
107 .read(&mut self.input_buffer[self.input_buffer_end..])?;
108 self.input_buffer_end += read;
109 self.is_ending = read == 0;
110 }
111 }
112
113 #[deprecated(note = "Use parse_next() instead")]
114 pub fn read_next_event(&mut self) -> Result<JsonEvent<'_>, JsonParseError> {
115 self.parse_next()
116 }
117}
118
119#[cfg(feature = "async-tokio")]
139pub struct TokioAsyncReaderJsonParser<R: AsyncRead + Unpin> {
140 input_buffer: Vec<u8>,
141 input_buffer_start: usize,
142 input_buffer_end: usize,
143 max_buffer_size: usize,
144 is_ending: bool,
145 read: R,
146 parser: LowLevelJsonParser,
147}
148
149#[cfg(feature = "async-tokio")]
150impl<R: AsyncRead + Unpin> TokioAsyncReaderJsonParser<R> {
151 pub const fn new(read: R) -> Self {
152 Self {
153 input_buffer: Vec::new(),
154 input_buffer_start: 0,
155 input_buffer_end: 0,
156 max_buffer_size: MAX_BUFFER_SIZE,
157 is_ending: false,
158 read,
159 parser: LowLevelJsonParser::new(),
160 }
161 }
162
163 pub fn with_max_buffer_size(mut self, size: usize) -> Self {
165 self.max_buffer_size = size;
166 self
167 }
168
169 pub async fn parse_next(&mut self) -> Result<JsonEvent<'_>, JsonParseError> {
170 loop {
171 {
172 let LowLevelJsonParserResult {
173 event,
174 consumed_bytes,
175 } = self.parser.parse_next(
176 #[allow(unsafe_code)]
177 unsafe {
178 let input_buffer_ptr: *const [u8] =
179 &self.input_buffer[self.input_buffer_start..self.input_buffer_end];
180 &*input_buffer_ptr
181 }, self.is_ending,
183 );
184 self.input_buffer_start += consumed_bytes;
185 if let Some(event) = event {
186 return Ok(event?);
187 }
188 }
189 if self.input_buffer_start > 0 {
190 self.input_buffer
191 .copy_within(self.input_buffer_start..self.input_buffer_end, 0);
192 self.input_buffer_end -= self.input_buffer_start;
193 self.input_buffer_start = 0;
194 }
195 if self.input_buffer.len() == self.max_buffer_size {
196 return Err(io::Error::new(
197 io::ErrorKind::OutOfMemory,
198 format!(
199 "Reached the buffer maximal size of {}",
200 self.max_buffer_size
201 ),
202 )
203 .into());
204 }
205 let min_end = min(
206 self.input_buffer_end + MIN_BUFFER_SIZE,
207 self.max_buffer_size,
208 );
209 if self.input_buffer.len() < min_end {
210 self.input_buffer.resize(min_end, 0);
211 }
212 if self.input_buffer.len() < self.input_buffer.capacity() {
213 self.input_buffer.resize(self.input_buffer.capacity(), 0);
215 }
216 let read = self
217 .read
218 .read(&mut self.input_buffer[self.input_buffer_end..])
219 .await?;
220 self.input_buffer_end += read;
221 self.is_ending = read == 0;
222 }
223 }
224
225 #[deprecated(note = "Use parse_next() instead")]
226 pub async fn read_next_event(&mut self) -> Result<JsonEvent<'_>, JsonParseError> {
227 self.parse_next().await
228 }
229}
230
231pub struct SliceJsonParser<'a> {
245 input_buffer: &'a [u8],
246 parser: LowLevelJsonParser,
247}
248
249impl<'a> SliceJsonParser<'a> {
250 pub const fn new(buffer: &'a [u8]) -> Self {
251 Self {
252 input_buffer: buffer,
253 parser: LowLevelJsonParser::new(),
254 }
255 }
256
257 pub fn parse_next(&mut self) -> Result<JsonEvent<'a>, JsonSyntaxError> {
258 loop {
259 let LowLevelJsonParserResult {
260 event,
261 consumed_bytes,
262 } = self.parser.parse_next(self.input_buffer, true);
263 self.input_buffer = &self.input_buffer[consumed_bytes..];
264 if let Some(event) = event {
265 return event;
266 }
267 }
268 }
269
270 #[deprecated(note = "Use parse_next() instead")]
271 pub fn read_next_event(&mut self) -> Result<JsonEvent<'_>, JsonSyntaxError> {
272 self.parse_next()
273 }
274}
275
276pub struct LowLevelJsonParser {
331 lexer: JsonLexer,
332 state_stack: Vec<JsonState>,
333 max_state_stack_size: usize,
334 element_read: bool,
335 buffered_event: Option<JsonEvent<'static>>,
336}
337
338impl LowLevelJsonParser {
339 pub const fn new() -> Self {
340 Self {
341 lexer: JsonLexer {
342 file_offset: 0,
343 file_line: 0,
344 file_start_of_last_line: 0,
345 file_start_of_last_token: 0,
346 is_start: true,
347 },
348 state_stack: Vec::new(),
349 max_state_stack_size: MAX_STATE_STACK_SIZE,
350 element_read: false,
351 buffered_event: None,
352 }
353 }
354
355 pub fn with_max_stack_size(mut self, size: usize) -> Self {
357 self.max_state_stack_size = size;
358 self
359 }
360
361 pub fn parse_next<'a>(
365 &mut self,
366 input_buffer: &'a [u8],
367 is_ending: bool,
368 ) -> LowLevelJsonParserResult<'a> {
369 if let Some(event) = self.buffered_event.take() {
370 return LowLevelJsonParserResult {
371 consumed_bytes: 0,
372 event: Some(Ok(event)),
373 };
374 }
375 let start_file_offset = self.lexer.file_offset;
376 while let Some(token) = self.lexer.read_next_token(
377 &input_buffer[usize::try_from(self.lexer.file_offset - start_file_offset).unwrap()..],
378 is_ending,
379 ) {
380 let consumed_bytes = (self.lexer.file_offset - start_file_offset)
381 .try_into()
382 .unwrap();
383 match token {
384 Ok(token) => {
385 let (event, error) = self.apply_new_token(token);
386 let error = error.map(|e| {
387 self.lexer.syntax_error(
388 self.lexer.file_start_of_last_token..self.lexer.file_offset,
389 e,
390 )
391 });
392 if let Some(error) = error {
393 self.buffered_event = event.map(owned_event);
394 return LowLevelJsonParserResult {
395 consumed_bytes,
396 event: Some(Err(error)),
397 };
398 }
399 if let Some(event) = event {
400 return LowLevelJsonParserResult {
401 consumed_bytes,
402 event: Some(Ok(event)),
403 };
404 }
405 }
406 Err(error) => {
407 return LowLevelJsonParserResult {
408 consumed_bytes,
409 event: Some(Err(error)),
410 }
411 }
412 }
413 }
414 LowLevelJsonParserResult {
415 consumed_bytes: (self.lexer.file_offset - start_file_offset)
416 .try_into()
417 .unwrap(),
418 event: if is_ending {
419 self.buffered_event = Some(JsonEvent::Eof);
420 Some(Err(self.lexer.syntax_error(
421 self.lexer.file_offset..self.lexer.file_offset + 1,
422 "Unexpected end of file",
423 )))
424 } else {
425 None
426 },
427 }
428 }
429
430 #[deprecated(note = "Use parse_next() instead")]
431 pub fn read_next_event<'a>(
432 &mut self,
433 input_buffer: &'a [u8],
434 is_ending: bool,
435 ) -> LowLevelJsonParserResult<'a> {
436 self.parse_next(input_buffer, is_ending)
437 }
438
439 fn apply_new_token<'a>(
440 &mut self,
441 token: JsonToken<'a>,
442 ) -> (Option<JsonEvent<'a>>, Option<String>) {
443 match self.state_stack.pop() {
444 Some(JsonState::ObjectKeyOrEnd) => {
445 if token == JsonToken::ClosingCurlyBracket {
446 (Some(JsonEvent::EndObject), None)
447 } else {
448 if let Err(e) = self.push_state_stack(JsonState::ObjectKey) {
449 return (None, Some(e));
450 }
451 self.apply_new_token(token)
452 }
453 }
454 Some(JsonState::ObjectKey) => {
455 if token == JsonToken::ClosingCurlyBracket {
456 return (Some(JsonEvent::EndObject), Some("Trailing commas are not allowed".into()));
457 }
458 if let Err(e) = self.push_state_stack(JsonState::ObjectColon) {
459 return (None, Some(e));
460 }
461 if let JsonToken::String(key) = token {
462 (Some(JsonEvent::ObjectKey(key)), None)
463 } else {
464 (None, Some("Object keys must be strings".into()))
465 }
466 }
467 Some(JsonState::ObjectColon) => {
468 if let Err(e) = self.push_state_stack(JsonState::ObjectValue) {
469 return (None, Some(e));
470 }
471 if token == JsonToken::Colon {
472 (None, None)
473 } else {
474 let (event, _) = self.apply_new_token(token);
475 (event, Some("Object keys must be strings".into()))
476 }
477 }
478 Some(JsonState::ObjectValue) => {
479 if let Err(e) = self.push_state_stack(JsonState::ObjectCommaOrEnd) {
480 return (None, Some(e));
481 }
482 self.apply_new_token_for_value(token)
483 }
484 Some(JsonState::ObjectCommaOrEnd) => match token {
485 JsonToken::Comma => {
486 (None, self.push_state_stack(JsonState::ObjectKey).err())
487 }
488 JsonToken::ClosingCurlyBracket => (Some(JsonEvent::EndObject), None),
489 _ => (None, Some("Object values must be followed by a comma to add a new value or a curly bracket to end the object".into())),
490 },
491 Some(JsonState::ArrayValueOrEnd) =>{
492 if token == JsonToken::ClosingSquareBracket {
493 return (Some(JsonEvent::EndArray), None);
494 }
495 if let Err(e) = self.push_state_stack(JsonState::ArrayValue) {
496 return (None, Some(e));
497 }
498 self.apply_new_token(token)
499 }
500 Some(JsonState::ArrayValue) => {
501 if token == JsonToken::ClosingSquareBracket {
502 return (Some(JsonEvent::EndArray), Some("Trailing commas are not allowed".into()));
503 }
504 if let Err(e) = self.push_state_stack(JsonState::ArrayCommaOrEnd) {
505 return (None, Some(e));
506 }
507 self.apply_new_token_for_value(token)
508 }
509 Some(JsonState::ArrayCommaOrEnd) => match token {
510 JsonToken::Comma => {
511 (None, self.push_state_stack(JsonState::ArrayValue).err())
512 }
513 JsonToken::ClosingSquareBracket => (Some(JsonEvent::EndArray), None),
514 _ => {
515 let _ = self.push_state_stack(JsonState::ArrayValue); let (event, _) = self.apply_new_token(token);
517 (event, Some("Array values must be followed by a comma to add a new value or a squared bracket to end the array".into()))
518 }
519 }
520 None => if self.element_read {
521 if token == JsonToken::Eof {
522 (Some(JsonEvent::Eof), None)
523 } else {
524 (None, Some("The JSON already contains one root element".into()))
525 }
526 } else {
527 self.element_read = true;
528 self.apply_new_token_for_value(token)
529 }
530 }
531 }
532
533 fn apply_new_token_for_value<'a>(
534 &mut self,
535 token: JsonToken<'a>,
536 ) -> (Option<JsonEvent<'a>>, Option<String>) {
537 match token {
538 JsonToken::OpeningSquareBracket => (
539 Some(JsonEvent::StartArray),
540 self.push_state_stack(JsonState::ArrayValueOrEnd).err(),
541 ),
542 JsonToken::ClosingSquareBracket => (
543 None,
544 Some("Unexpected closing square bracket, no array to close".into()),
545 ),
546 JsonToken::OpeningCurlyBracket => (
547 Some(JsonEvent::StartObject),
548 self.push_state_stack(JsonState::ObjectKeyOrEnd).err(),
549 ),
550 JsonToken::ClosingCurlyBracket => (
551 None,
552 Some("Unexpected closing curly bracket, no array to close".into()),
553 ),
554 JsonToken::Comma => (None, Some("Unexpected comma, no values to separate".into())),
555 JsonToken::Colon => (None, Some("Unexpected colon, no key to follow".into())),
556 JsonToken::String(string) => (Some(JsonEvent::String(string)), None),
557 JsonToken::Number(number) => (Some(JsonEvent::Number(number)), None),
558 JsonToken::True => (Some(JsonEvent::Boolean(true)), None),
559 JsonToken::False => (Some(JsonEvent::Boolean(false)), None),
560 JsonToken::Null => (Some(JsonEvent::Null), None),
561 JsonToken::Eof => (
562 Some(JsonEvent::Eof),
563 Some("Unexpected end of file, a value was expected".into()),
564 ),
565 }
566 }
567
568 fn push_state_stack(&mut self, state: JsonState) -> Result<(), String> {
569 self.check_stack_size()?;
570 self.state_stack.push(state);
571 Ok(())
572 }
573
574 fn check_stack_size(&self) -> Result<(), String> {
575 if self.state_stack.len() > self.max_state_stack_size {
576 Err(format!(
577 "Max stack size of {} reached on an object opening",
578 self.max_state_stack_size
579 ))
580 } else {
581 Ok(())
582 }
583 }
584}
585
586impl Default for LowLevelJsonParser {
587 fn default() -> Self {
588 Self::new()
589 }
590}
591
592#[derive(Eq, PartialEq, Copy, Clone, Debug)]
593enum JsonState {
594 ObjectKey,
595 ObjectKeyOrEnd,
596 ObjectColon,
597 ObjectValue,
598 ObjectCommaOrEnd,
599 ArrayValue,
600 ArrayValueOrEnd,
601 ArrayCommaOrEnd,
602}
603
604#[derive(Eq, PartialEq, Clone, Debug)]
605enum JsonToken<'a> {
606 OpeningSquareBracket, ClosingSquareBracket, OpeningCurlyBracket, ClosingCurlyBracket, Comma, Colon, String(Cow<'a, str>), Number(Cow<'a, str>), True, False, Null, Eof, }
619
620struct JsonLexer {
621 file_offset: u64,
622 file_line: u64,
623 file_start_of_last_line: u64,
624 file_start_of_last_token: u64,
625 is_start: bool,
626}
627
628impl JsonLexer {
629 fn read_next_token<'a>(
630 &mut self,
631 mut input_buffer: &'a [u8],
632 is_ending: bool,
633 ) -> Option<Result<JsonToken<'a>, JsonSyntaxError>> {
634 if self.is_start {
636 if input_buffer.len() < 3 && !is_ending {
637 return None;
638 }
639 self.is_start = false;
640 if input_buffer.starts_with(&[0xEF, 0xBB, 0xBF]) {
641 input_buffer = &input_buffer[3..];
642 self.file_offset += 3;
643 }
644 }
645
646 let mut i = 0;
648 while let Some(c) = input_buffer.get(i) {
649 match *c {
650 b' ' | b'\t' => {
651 i += 1;
652 }
653 b'\n' => {
654 i += 1;
655 self.file_line += 1;
656 self.file_start_of_last_line = self.file_offset + u64::try_from(i).unwrap();
657 }
658 b'\r' => {
659 i += 1;
660 if let Some(c) = input_buffer.get(i) {
661 if *c == b'\n' {
662 i += 1; }
664 } else if !is_ending {
665 i -= 1;
667 self.file_offset += u64::try_from(i).unwrap();
668 return None;
669 }
670 self.file_line += 1;
671 self.file_start_of_last_line = self.file_offset + u64::try_from(i).unwrap();
672 }
673 _ => {
674 break;
675 }
676 }
677 }
678 self.file_offset += u64::try_from(i).unwrap();
679 input_buffer = &input_buffer[i..];
680 self.file_start_of_last_token = self.file_offset;
681
682 if is_ending && input_buffer.is_empty() {
683 return Some(Ok(JsonToken::Eof));
684 }
685
686 match *input_buffer.first()? {
688 b'{' => {
689 self.file_offset += 1;
690 Some(Ok(JsonToken::OpeningCurlyBracket))
691 }
692 b'}' => {
693 self.file_offset += 1;
694 Some(Ok(JsonToken::ClosingCurlyBracket))
695 }
696 b'[' => {
697 self.file_offset += 1;
698 Some(Ok(JsonToken::OpeningSquareBracket))
699 }
700 b']' => {
701 self.file_offset += 1;
702 Some(Ok(JsonToken::ClosingSquareBracket))
703 }
704 b',' => {
705 self.file_offset += 1;
706 Some(Ok(JsonToken::Comma))
707 }
708 b':' => {
709 self.file_offset += 1;
710 Some(Ok(JsonToken::Colon))
711 }
712 b'"' => self.read_string(input_buffer),
713 b't' => self.read_constant(input_buffer, is_ending, "true", JsonToken::True),
714 b'f' => self.read_constant(input_buffer, is_ending, "false", JsonToken::False),
715 b'n' => self.read_constant(input_buffer, is_ending, "null", JsonToken::Null),
716 b'-' | b'0'..=b'9' => self.read_number(input_buffer, is_ending),
717 c => {
718 self.file_offset += 1;
719 Some(Err(self.syntax_error(
720 self.file_offset - 1..self.file_offset,
721 if c < 128 {
722 format!("Unexpected char: '{}'", char::from(c))
723 } else {
724 format!("Unexpected byte: \\x{c:X}")
725 },
726 )))
727 }
728 }
729 }
730
731 fn read_string<'a>(
732 &mut self,
733 input_buffer: &'a [u8],
734 ) -> Option<Result<JsonToken<'a>, JsonSyntaxError>> {
735 let mut error = None;
736 let mut string: Option<(String, usize)> = None;
737 let mut next_byte_offset = 1;
738 loop {
739 match *input_buffer.get(next_byte_offset)? {
740 b'"' => {
741 let result = Some(if let Some(error) = error {
743 Err(error)
744 } else if let Some((mut string, read_until)) = string {
745 if read_until < next_byte_offset {
746 let (str, e) = self.decode_utf8(
747 &input_buffer[read_until..next_byte_offset],
748 self.file_offset + u64::try_from(read_until).unwrap(),
749 );
750 error = error.or(e);
751 string.push_str(&str);
752 }
753 if let Some(error) = error {
754 Err(error)
755 } else {
756 Ok(JsonToken::String(Cow::Owned(string)))
757 }
758 } else {
759 let (string, error) = self
760 .decode_utf8(&input_buffer[1..next_byte_offset], self.file_offset + 1);
761 if let Some(error) = error {
762 Err(error)
763 } else {
764 Ok(JsonToken::String(string))
765 }
766 });
767 self.file_offset += u64::try_from(next_byte_offset).unwrap() + 1;
768 return result;
769 }
770 b'\\' => {
771 if string.is_none() {
773 string = Some((String::new(), 1))
774 }
775 let (string, read_until) = string.as_mut().unwrap();
776 if *read_until < next_byte_offset {
777 let (str, e) = self.decode_utf8(
778 &input_buffer[*read_until..next_byte_offset],
779 self.file_offset + u64::try_from(*read_until).unwrap(),
780 );
781 error = error.or(e);
782 string.push_str(&str);
783 }
784 next_byte_offset += 1;
785 match *input_buffer.get(next_byte_offset)? {
786 b'"' => {
787 string.push('"');
788 next_byte_offset += 1;
789 }
790 b'\\' => {
791 string.push('\\');
792 next_byte_offset += 1;
793 }
794 b'/' => {
795 string.push('/');
796 next_byte_offset += 1;
797 }
798 b'b' => {
799 string.push('\u{8}');
800 next_byte_offset += 1;
801 }
802 b'f' => {
803 string.push('\u{C}');
804 next_byte_offset += 1;
805 }
806 b'n' => {
807 string.push('\n');
808 next_byte_offset += 1;
809 }
810 b'r' => {
811 string.push('\r');
812 next_byte_offset += 1;
813 }
814 b't' => {
815 string.push('\t');
816 next_byte_offset += 1;
817 }
818 b'u' => {
819 next_byte_offset += 1;
820 let val = input_buffer.get(next_byte_offset..next_byte_offset + 4)?;
821 next_byte_offset += 4;
822 let code_point = match read_hexa_char(val) {
823 Ok(cp) => cp,
824 Err(e) => {
825 error = error.or_else(|| {
826 let pos = self.file_offset
827 + u64::try_from(next_byte_offset).unwrap();
828 Some(self.syntax_error(pos - 4..pos, e))
829 });
830 char::REPLACEMENT_CHARACTER.into()
831 }
832 };
833 if let Some(c) = char::from_u32(code_point) {
834 string.push(c);
835 } else {
836 let high_surrogate = code_point;
837 if !(0xD800..=0xDBFF).contains(&high_surrogate) {
838 error = error.or_else(|| {
839 let pos = self.file_offset
840 + u64::try_from(next_byte_offset).unwrap();
841 Some(self.syntax_error(
842 pos - 6..pos,
843 format!(
844 "\\u{:X} is not a valid high surrogate",
845 high_surrogate
846 ),
847 ))
848 });
849 }
850 let val =
851 input_buffer.get(next_byte_offset..next_byte_offset + 6)?;
852 next_byte_offset += 6;
853 if !val.starts_with(b"\\u") {
854 error = error.or_else(|| {
855 let pos = self.file_offset + u64::try_from(next_byte_offset).unwrap();
856 Some(self.syntax_error(
857 pos - 6..pos,
858 format!(
859 "\\u{:X} is a high surrogate and should be followed by a low surrogate \\uXXXX",
860 high_surrogate
861 )
862 ))
863 });
864 }
865 let low_surrogate = match read_hexa_char(&val[2..]) {
866 Ok(cp) => cp,
867 Err(e) => {
868 error = error.or_else(|| {
869 let pos = self.file_offset
870 + u64::try_from(next_byte_offset).unwrap();
871 Some(self.syntax_error(pos - 6..pos, e))
872 });
873 char::REPLACEMENT_CHARACTER.into()
874 }
875 };
876 if !(0xDC00..=0xDFFF).contains(&low_surrogate) {
877 error = error.or_else(|| {
878 let pos = self.file_offset
879 + u64::try_from(next_byte_offset).unwrap();
880 Some(self.syntax_error(
881 pos - 6..pos,
882 format!(
883 "\\u{:X} is not a valid low surrogate",
884 low_surrogate
885 ),
886 ))
887 });
888 }
889 let code_point = 0x10000
890 + ((high_surrogate & 0x03FF) << 10)
891 + (low_surrogate & 0x03FF);
892 if let Some(c) = char::from_u32(code_point) {
893 string.push(c)
894 } else {
895 string.push(char::REPLACEMENT_CHARACTER);
896 error = error.or_else(|| {
897 let pos = self.file_offset
898 + u64::try_from(next_byte_offset).unwrap();
899 Some(self.syntax_error(
900 pos - 12..pos,
901 format!(
902 "\\u{:X}\\u{:X} is an invalid surrogate pair",
903 high_surrogate, low_surrogate
904 ),
905 ))
906 });
907 }
908 }
909 }
910 c => {
911 next_byte_offset += 1;
912 error = error.or_else(|| {
913 let pos =
914 self.file_offset + u64::try_from(next_byte_offset).unwrap();
915 Some(self.syntax_error(
916 pos - 2..pos,
917 format!("'\\{}' is not a valid escape sequence", char::from(c)),
918 ))
919 });
920 string.push(char::REPLACEMENT_CHARACTER);
921 }
922 }
923 *read_until = next_byte_offset;
924 }
925 c @ (0..=0x1F) => {
926 error = error.or_else(|| {
927 let pos = self.file_offset + u64::try_from(next_byte_offset).unwrap();
928 Some(self.syntax_error(
929 pos..pos + 1,
930 format!("'{}' is not allowed in JSON strings", char::from(c)),
931 ))
932 });
933 next_byte_offset += 1;
934 }
935 _ => {
936 next_byte_offset += 1;
937 }
938 }
939 }
940 }
941
942 fn read_constant(
943 &mut self,
944 input_buffer: &[u8],
945 is_ending: bool,
946 expected: &str,
947 value: JsonToken<'static>,
948 ) -> Option<Result<JsonToken<'static>, JsonSyntaxError>> {
949 if input_buffer.get(..expected.len())? == expected.as_bytes() {
950 self.file_offset += u64::try_from(expected.len()).unwrap();
951 return Some(Ok(value));
952 }
953 let ascii_chars = input_buffer
954 .iter()
955 .take_while(|c| c.is_ascii_alphabetic())
956 .count();
957 if ascii_chars == input_buffer.len() && !is_ending {
958 return None; }
960 let read = max(1, ascii_chars); let start_offset = self.file_offset;
962 self.file_offset += u64::try_from(read).unwrap();
963 Some(Err(self.syntax_error(
964 start_offset..self.file_offset,
965 format!("{} expected", expected),
966 )))
967 }
968
969 fn read_number<'a>(
970 &mut self,
971 input_buffer: &'a [u8],
972 is_ending: bool,
973 ) -> Option<Result<JsonToken<'a>, JsonSyntaxError>> {
974 let mut next_byte_offset = 0;
975 if *input_buffer.get(next_byte_offset)? == b'-' {
976 next_byte_offset += 1;
977 }
978 match *input_buffer.get(next_byte_offset)? {
980 b'0' => {
981 next_byte_offset += 1;
982 }
983 b'1'..=b'9' => {
984 next_byte_offset += 1;
985 next_byte_offset += read_digits(&input_buffer[next_byte_offset..], is_ending)?;
986 }
987 c => {
988 next_byte_offset += 1;
989 self.file_offset += u64::try_from(next_byte_offset).unwrap();
990 return Some(Err(self.syntax_error(
991 self.file_offset - 1..self.file_offset,
992 format!("A number is not allowed to start with '{}'", char::from(c)),
993 )));
994 }
995 }
996
997 if input_buffer.get(next_byte_offset).map_or_else(
999 || if is_ending { Some(None) } else { None },
1000 |c| Some(Some(*c)),
1001 )? == Some(b'.')
1002 {
1003 next_byte_offset += 1;
1004 let c = *input_buffer.get(next_byte_offset)?;
1005 next_byte_offset += 1;
1006 if !c.is_ascii_digit() {
1007 self.file_offset += u64::try_from(next_byte_offset).unwrap();
1008 return Some(Err(self.syntax_error(
1009 self.file_offset - 1..self.file_offset,
1010 format!(
1011 "A number fractional part must start with a digit and not '{}'",
1012 char::from(c)
1013 ),
1014 )));
1015 }
1016 next_byte_offset += read_digits(&input_buffer[next_byte_offset..], is_ending)?;
1017 }
1018
1019 let c = input_buffer.get(next_byte_offset).map_or_else(
1021 || if is_ending { Some(None) } else { None },
1022 |c| Some(Some(*c)),
1023 )?;
1024 if c == Some(b'e') || c == Some(b'E') {
1025 next_byte_offset += 1;
1026 match *input_buffer.get(next_byte_offset)? {
1027 b'-' | b'+' => {
1028 next_byte_offset += 1;
1029 let c = *input_buffer.get(next_byte_offset)?;
1030 next_byte_offset += 1;
1031 if !c.is_ascii_digit() {
1032 self.file_offset += u64::try_from(next_byte_offset).unwrap();
1033 return Some(Err(self.syntax_error(
1034 self.file_offset - 1..self.file_offset,
1035 format!(
1036 "A number exponential part must contain at least a digit, '{}' found",
1037 char::from(c)
1038 ),
1039 )));
1040 }
1041 }
1042 b'0'..=b'9' => {
1043 next_byte_offset += 1;
1044 }
1045 c => {
1046 next_byte_offset += 1;
1047 self.file_offset += u64::try_from(next_byte_offset).unwrap();
1048 return Some(Err(self.syntax_error(
1049 self.file_offset - 1..self.file_offset,
1050 format!(
1051 "A number exponential part must start with +, - or a digit, '{}' found",
1052 char::from(c)
1053 ),
1054 )));
1055 }
1056 }
1057 next_byte_offset += read_digits(&input_buffer[next_byte_offset..], is_ending)?;
1058 }
1059 self.file_offset += u64::try_from(next_byte_offset).unwrap();
1060 Some(Ok(JsonToken::Number(Cow::Borrowed(
1061 str::from_utf8(&input_buffer[..next_byte_offset]).unwrap(),
1062 ))))
1063 }
1064
1065 fn decode_utf8<'a>(
1066 &self,
1067 input_buffer: &'a [u8],
1068 start_position: u64,
1069 ) -> (Cow<'a, str>, Option<JsonSyntaxError>) {
1070 match str::from_utf8(input_buffer) {
1071 Ok(str) => (Cow::Borrowed(str), None),
1072 Err(e) => (
1073 String::from_utf8_lossy(input_buffer),
1074 Some({
1075 let pos = start_position + u64::try_from(e.valid_up_to()).unwrap();
1076 self.syntax_error(pos..pos + 1, format!("Invalid UTF-8: {e}"))
1077 }),
1078 ),
1079 }
1080 }
1081
1082 fn syntax_error(&self, file_offset: Range<u64>, message: impl Into<String>) -> JsonSyntaxError {
1083 let start_file_offset = max(file_offset.start, self.file_start_of_last_line);
1084 JsonSyntaxError {
1085 location: TextPosition {
1086 line: self.file_line,
1087 column: start_file_offset - self.file_start_of_last_line, offset: start_file_offset,
1089 }..TextPosition {
1090 line: self.file_line,
1091 column: file_offset.end - self.file_start_of_last_line, offset: file_offset.end,
1093 },
1094 message: message.into(),
1095 }
1096 }
1097}
1098
1099fn read_hexa_char(input: &[u8]) -> Result<u32, String> {
1100 let mut value = 0;
1101 for c in input.iter().copied() {
1102 value = value * 16
1103 + match c {
1104 b'0'..=b'9' => u32::from(c) - u32::from(b'0'),
1105 b'a'..=b'f' => u32::from(c) - u32::from(b'a') + 10,
1106 b'A'..=b'F' => u32::from(c) - u32::from(b'A') + 10,
1107 _ => {
1108 return Err(format!(
1109 "Unexpected character in a unicode escape: '{}'",
1110 char::from(c)
1111 ))
1112 }
1113 }
1114 }
1115 Ok(value)
1116}
1117
1118fn read_digits(input_buffer: &[u8], is_ending: bool) -> Option<usize> {
1119 let count = input_buffer
1120 .iter()
1121 .take_while(|c| c.is_ascii_digit())
1122 .count();
1123 if count == input_buffer.len() && !is_ending {
1124 return None;
1125 }
1126 Some(count)
1127}
1128
1129fn owned_event(event: JsonEvent<'_>) -> JsonEvent<'static> {
1130 match event {
1131 JsonEvent::String(s) => JsonEvent::String(s.into_owned().into()),
1132 JsonEvent::Number(n) => JsonEvent::Number(n.into_owned().into()),
1133 JsonEvent::Boolean(b) => JsonEvent::Boolean(b),
1134 JsonEvent::Null => JsonEvent::Null,
1135 JsonEvent::StartArray => JsonEvent::StartArray,
1136 JsonEvent::EndArray => JsonEvent::EndArray,
1137 JsonEvent::StartObject => JsonEvent::StartObject,
1138 JsonEvent::EndObject => JsonEvent::EndObject,
1139 JsonEvent::ObjectKey(k) => JsonEvent::ObjectKey(k.into_owned().into()),
1140 JsonEvent::Eof => JsonEvent::Eof,
1141 }
1142}
1143
1144#[derive(Debug)]
1146pub struct LowLevelJsonParserResult<'a> {
1147 pub consumed_bytes: usize,
1149 pub event: Option<Result<JsonEvent<'a>, JsonSyntaxError>>,
1151}
1152
1153#[derive(Eq, PartialEq, Debug, Clone, Copy)]
1155pub struct TextPosition {
1156 pub line: u64,
1157 pub column: u64,
1158 pub offset: u64,
1159}
1160
1161#[derive(Debug)]
1165pub struct JsonSyntaxError {
1166 location: Range<TextPosition>,
1167 message: String,
1168}
1169
1170impl JsonSyntaxError {
1171 #[inline]
1173 pub fn location(&self) -> Range<TextPosition> {
1174 self.location.clone()
1175 }
1176
1177 #[inline]
1179 pub fn message(&self) -> &str {
1180 &self.message
1181 }
1182}
1183
1184impl fmt::Display for JsonSyntaxError {
1185 #[inline]
1186 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1187 if self.location.start.offset + 1 >= self.location.end.offset {
1188 write!(
1189 f,
1190 "Parser error at line {} column {}: {}",
1191 self.location.start.line + 1,
1192 self.location.start.column + 1,
1193 self.message
1194 )
1195 } else if self.location.start.line == self.location.end.line {
1196 write!(
1197 f,
1198 "Parser error at line {} between columns {} and column {}: {}",
1199 self.location.start.line + 1,
1200 self.location.start.column + 1,
1201 self.location.end.column + 1,
1202 self.message
1203 )
1204 } else {
1205 write!(
1206 f,
1207 "Parser error between line {} column {} and line {} column {}: {}",
1208 self.location.start.line + 1,
1209 self.location.start.column + 1,
1210 self.location.end.line + 1,
1211 self.location.end.column + 1,
1212 self.message
1213 )
1214 }
1215 }
1216}
1217
1218impl Error for JsonSyntaxError {}
1219
1220impl From<JsonSyntaxError> for io::Error {
1221 #[inline]
1222 fn from(error: JsonSyntaxError) -> Self {
1223 io::Error::new(io::ErrorKind::InvalidData, error)
1224 }
1225}
1226
1227#[derive(Debug)]
1231pub enum JsonParseError {
1232 Io(io::Error),
1234 Syntax(JsonSyntaxError),
1236}
1237
1238impl fmt::Display for JsonParseError {
1239 #[inline]
1240 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1241 match self {
1242 Self::Io(e) => e.fmt(f),
1243 Self::Syntax(e) => e.fmt(f),
1244 }
1245 }
1246}
1247
1248impl Error for JsonParseError {
1249 #[inline]
1250 fn source(&self) -> Option<&(dyn Error + 'static)> {
1251 Some(match self {
1252 Self::Io(e) => e,
1253 Self::Syntax(e) => e,
1254 })
1255 }
1256}
1257
1258impl From<JsonSyntaxError> for JsonParseError {
1259 #[inline]
1260 fn from(error: JsonSyntaxError) -> Self {
1261 Self::Syntax(error)
1262 }
1263}
1264
1265impl From<io::Error> for JsonParseError {
1266 #[inline]
1267 fn from(error: io::Error) -> Self {
1268 Self::Io(error)
1269 }
1270}
1271
1272impl From<JsonParseError> for io::Error {
1273 #[inline]
1274 fn from(error: JsonParseError) -> Self {
1275 match error {
1276 JsonParseError::Syntax(e) => e.into(),
1277 JsonParseError::Io(e) => e,
1278 }
1279 }
1280}