1use crate::error::*;
2use crate::MAX_STACK_SIZE;
3use rio_api::parser::LineBytePosition;
4use std::collections::VecDeque;
5use std::io::{BufRead, ErrorKind, Read};
6use std::str;
7
8pub struct LookAheadByteReader<R: Read> {
10 inner: R,
11 buffer: VecDeque<u8>,
12 current: Option<u8>,
13 line_number: u64,
14 byte_number: u64,
15 stack_size: usize,
16}
17
18const DEFAULT_CAPACITY: usize = 8 * 1024;
19
20impl<R: BufRead> LookAheadByteReader<R> {
21 pub fn new(inner: R) -> Self {
22 let mut buffer = VecDeque::with_capacity(DEFAULT_CAPACITY);
23 buffer.push_back(b'\n');
24 Self {
25 inner,
26 buffer,
27 current: Some(b'\n'),
28 line_number: 0,
29 byte_number: 1,
30 stack_size: 0,
31 }
32 }
33
34 pub fn current(&self) -> Option<u8> {
36 self.current
37 }
38 pub fn required_current(&self) -> Result<u8, TurtleError> {
40 self.current()
41 .ok_or_else(|| self.parse_error(TurtleErrorKind::PrematureEof))
42 }
43
44 pub fn next(&mut self) -> Result<Option<u8>, TurtleError> {
46 self.ahead(1)
47 }
48
49 pub fn required_next(&mut self) -> Result<u8, TurtleError> {
51 self.ahead(1)?
52 .ok_or_else(|| self.parse_error(TurtleErrorKind::PrematureEof))
53 }
54
55 pub fn ahead(&mut self, count: usize) -> Result<Option<u8>, TurtleError> {
57 loop {
58 let mut position = count;
59 let (first, second) = self.buffer.as_slices();
60 if position < first.len() {
61 return Ok(Some(first[position]));
62 }
63 position -= first.len();
64 if position < second.len() {
65 return Ok(Some(second[position]));
66 }
67 if self.fill_and_is_end()? {
68 return Ok(None);
69 }
70 }
71 }
72
73 pub fn consume(&mut self) -> Result<(), TurtleError> {
75 self.consume_many(1)
76 }
77
78 pub fn consume_many(&mut self, count: usize) -> Result<(), TurtleError> {
80 for _ in 0..count {
81 if self.buffer.is_empty() {
82 self.fill_and_is_end()?;
83 }
84 if let Some(c) = self.buffer.pop_front() {
85 if c == b'\n' {
86 self.line_number += 1;
87 self.byte_number = 1;
88 } else {
89 self.byte_number += 1;
90 }
91 } else {
92 return Err(self.parse_error(TurtleErrorKind::PrematureEof));
93 }
94 }
95 if self.buffer.is_empty() {
96 self.fill_and_is_end()?;
97 }
98 self.current = self.buffer.front().cloned();
99 Ok(())
100 }
101
102 pub fn line_number(&self) -> u64 {
104 self.line_number
105 }
106 pub fn byte_number(&self) -> u64 {
108 self.byte_number
109 }
110
111 pub fn starts_with(&mut self, prefix: &[u8]) -> bool {
113 self.starts_with_with_eq(prefix, |a, b| a == b)
114 }
115
116 pub fn starts_with_ignore_ascii_case(&mut self, prefix: &[u8]) -> bool {
119 self.starts_with_with_eq(prefix, |a, b| a.eq_ignore_ascii_case(b))
120 }
121
122 pub fn unexpected_char_error<T>(&self) -> Result<T, TurtleError> {
123 Err(self.parse_error(match self.current() {
124 Some(c) => TurtleErrorKind::UnexpectedByte(c),
125 None => TurtleErrorKind::PrematureEof,
126 }))
127 }
128
129 pub fn check_is_current(&self, expected: u8) -> Result<(), TurtleError> {
130 if self.current() == Some(expected) {
131 Ok(())
132 } else {
133 self.unexpected_char_error()
134 }
135 }
136
137 pub fn check_is_next(&mut self, expected: u8) -> Result<(), TurtleError> {
138 if self.next()? == Some(expected) {
139 Ok(())
140 } else {
141 self.unexpected_char_error()
142 }
143 }
144
145 pub fn parse_error(&self, kind: TurtleErrorKind) -> TurtleError {
146 TurtleError {
147 kind,
148 position: Some(LineBytePosition::new(
149 self.line_number(),
150 self.byte_number(),
151 )),
152 }
153 }
154
155 pub fn consume_line_end(&mut self) -> Result<(), TurtleError> {
156 loop {
157 match self.current() {
158 None => return Ok(()),
159 Some(b'\n') => return self.consume(),
160 _ => self.consume()?,
161 }
162 }
163 }
164
165 fn fill_and_is_end(&mut self) -> Result<bool, TurtleError> {
166 loop {
167 let mut buf = [0; DEFAULT_CAPACITY]; match self.inner.read(&mut buf) {
169 Ok(0) => return Ok(true),
170 Ok(read) => {
171 self.buffer.extend(buf[..read].iter());
172 return Ok(false);
173 }
174 Err(e) if e.kind() == ErrorKind::Interrupted => {}
175 Err(e) => return Err(e.into()),
176 }
177 }
178 }
179
180 fn starts_with_with_eq(&mut self, prefix: &[u8], eq: impl Fn(&[u8], &[u8]) -> bool) -> bool {
181 loop {
182 let (first, second) = self.buffer.as_slices();
183 if prefix.len() <= first.len() {
184 return eq(&first[..prefix.len()], prefix);
185 } else if prefix.len() <= first.len() + second.len() {
186 return eq(first, &prefix[..first.len()])
187 && eq(
188 &second[..prefix.len() - first.len()],
189 &prefix[first.len()..],
190 );
191 }
192 if let Ok(true) | Err(_) = self.fill_and_is_end() {
193 return false;
194 }
195 }
196 }
197
198 pub fn increment_stack_size(&mut self) -> Result<(), TurtleError> {
199 self.stack_size += 1;
200 if self.stack_size > MAX_STACK_SIZE {
201 Err(self.parse_error(TurtleErrorKind::StackOverflow))
202 } else {
203 Ok(())
204 }
205 }
206
207 pub fn decrement_stack_size(&mut self) {
208 self.stack_size -= 1;
209 }
210}
211
212#[derive(Default)]
213pub struct StringBufferStack {
214 inner: Vec<String>,
215 len: usize,
216}
217
218impl StringBufferStack {
219 pub fn with_capacity(cap: usize) -> Self {
220 StringBufferStack {
221 inner: Vec::with_capacity(cap),
222 len: 0,
223 }
224 }
225 pub fn push(&mut self) -> &mut String {
226 self.len += 1;
227 if self.len > self.inner.len() {
228 self.inner.push(String::default())
229 }
230 &mut self.inner[self.len - 1]
231 }
232
233 pub fn push2(&mut self) -> (&mut String, &mut String) {
234 self.push();
235 self.push();
236 let (a1, a2) = self.inner.split_at_mut(self.len - 1);
237 (&mut a1[a1.len() - 1], &mut a2[0])
238 }
239
240 pub fn pop(&mut self) {
241 self.inner[self.len - 1].clear();
242 self.len -= 1;
243 }
244
245 pub fn clear(&mut self) {
246 self.inner.clear();
247 self.len = 0;
248 }
249}
250
251#[derive(Default)]
252pub struct BlankNodeIdGenerator {
253 counter: u64,
255}
256
257impl BlankNodeIdGenerator {
258 pub fn generate(&mut self) -> BlankNodeId {
259 let mut id: [u8; 12] = [
260 b'r', b'i', b'o', b'g', b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0',
262 ];
263 self.counter += 1;
264 write_u64_to_slice(self.counter, &mut id[4..]);
265 BlankNodeId { id }
266 }
267
268 pub fn disambiguate(&self, label: &mut String) {
270 const SUFFIX: u8 = b'd';
271 let bytes = label.as_bytes();
272 if bytes.len() >= 12
273 && &bytes[..4] == b"riog"
274 && bytes[4..12].iter().all(u8::is_ascii_digit)
275 && bytes[12..].iter().all(|b| b == &SUFFIX)
276 {
277 label.push(SUFFIX as char)
278 }
279 }
280}
281
282fn write_u64_to_slice(mut v: u64, s: &mut [u8]) {
283 for i in (0..s.len()).rev() {
284 s[i] = b'0' + (v % 10) as u8;
285 v /= 10;
286 }
287}
288
289#[derive(Eq, PartialEq, Copy, Clone, Hash)]
290pub struct BlankNodeId {
291 id: [u8; 12],
292}
293
294impl AsRef<str> for BlankNodeId {
295 fn as_ref(&self) -> &str {
296 str::from_utf8(&self.id).unwrap()
298 }
299}