use std::io::{Read, Seek, SeekFrom};
use memchr::memmem::{Finder, FinderRev};
use crate::result::ZipResult;
pub trait FinderDirection<'a> {
fn new(needle: &'a [u8]) -> Self;
fn reset_cursor(bounds: (u64, u64), window_size: usize) -> u64;
fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize);
fn needle(&self) -> &[u8];
fn find(&self, haystack: &[u8]) -> Option<usize>;
fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64>;
fn move_scope(&self, offset: usize) -> usize;
}
pub struct Forward<'a>(Finder<'a>);
impl<'a> FinderDirection<'a> for Forward<'a> {
fn new(needle: &'a [u8]) -> Self {
Self(Finder::new(needle))
}
fn reset_cursor((start_inclusive, _): (u64, u64), _: usize) -> u64 {
start_inclusive
}
fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize) {
(&window[mid_window_offset..], mid_window_offset)
}
fn find(&self, haystack: &[u8]) -> Option<usize> {
self.0.find(haystack)
}
fn needle(&self) -> &[u8] {
self.0.needle()
}
fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64> {
let magic_overlap = self.needle().len().saturating_sub(1) as u64;
let next = cursor.saturating_add(window_size as u64 - magic_overlap);
if next >= bounds.1 {
None
} else {
Some(next)
}
}
fn move_scope(&self, offset: usize) -> usize {
offset + self.needle().len()
}
}
pub struct Backwards<'a>(FinderRev<'a>);
impl<'a> FinderDirection<'a> for Backwards<'a> {
fn new(needle: &'a [u8]) -> Self {
Self(FinderRev::new(needle))
}
fn reset_cursor(bounds: (u64, u64), window_size: usize) -> u64 {
bounds
.1
.saturating_sub(window_size as u64)
.clamp(bounds.0, bounds.1)
}
fn scope_window(window: &[u8], mid_window_offset: usize) -> (&[u8], usize) {
(&window[..mid_window_offset], 0)
}
fn find(&self, haystack: &[u8]) -> Option<usize> {
self.0.rfind(haystack)
}
fn needle(&self) -> &[u8] {
self.0.needle()
}
fn move_cursor(&self, cursor: u64, bounds: (u64, u64), window_size: usize) -> Option<u64> {
let magic_overlap = self.needle().len().saturating_sub(1) as u64;
if cursor <= bounds.0 {
None
} else {
Some(
cursor
.saturating_add(magic_overlap)
.saturating_sub(window_size as u64)
.clamp(bounds.0, bounds.1),
)
}
}
fn move_scope(&self, offset: usize) -> usize {
offset
}
}
pub struct MagicFinder<Direction> {
buffer: Box<[u8]>,
pub(self) finder: Direction,
cursor: u64,
mid_buffer_offset: Option<usize>,
bounds: (u64, u64),
}
impl<'a, T: FinderDirection<'a>> MagicFinder<T> {
pub fn new(magic_bytes: &'a [u8], start_inclusive: u64, end_exclusive: u64) -> Self {
const BUFFER_SIZE: usize = 2048;
debug_assert!(BUFFER_SIZE >= magic_bytes.len());
Self {
buffer: vec![0; BUFFER_SIZE].into_boxed_slice(),
finder: T::new(magic_bytes),
cursor: T::reset_cursor((start_inclusive, end_exclusive), BUFFER_SIZE),
mid_buffer_offset: None,
bounds: (start_inclusive, end_exclusive),
}
}
pub fn repurpose(&mut self, magic_bytes: &'a [u8], bounds: (u64, u64)) -> &mut Self {
debug_assert!(self.buffer.len() >= magic_bytes.len());
self.finder = T::new(magic_bytes);
self.cursor = T::reset_cursor(bounds, self.buffer.len());
self.bounds = bounds;
self.mid_buffer_offset = None;
self
}
pub fn next<R: Read + Seek>(&mut self, reader: &mut R) -> ZipResult<Option<u64>> {
loop {
if self.cursor < self.bounds.0 || self.cursor >= self.bounds.1 {
break;
}
let window_start = self.cursor;
let window_end = self
.cursor
.saturating_add(self.buffer.len() as u64)
.min(self.bounds.1);
if window_end <= window_start {
break;
}
let window = &mut self.buffer[..(window_end - window_start) as usize];
if self.mid_buffer_offset.is_none() {
reader.seek(SeekFrom::Start(window_start))?;
reader.read_exact(window)?;
}
let (window, window_start_offset) = match self.mid_buffer_offset {
Some(mid_buffer_offset) => T::scope_window(window, mid_buffer_offset),
None => (&*window, 0usize),
};
if let Some(offset) = self.finder.find(window) {
let magic_pos = window_start + window_start_offset as u64 + offset as u64;
reader.seek(SeekFrom::Start(magic_pos))?;
self.mid_buffer_offset = Some(self.finder.move_scope(window_start_offset + offset));
return Ok(Some(magic_pos));
}
self.mid_buffer_offset = None;
match self
.finder
.move_cursor(self.cursor, self.bounds, self.buffer.len())
{
Some(new_cursor) => {
self.cursor = new_cursor;
}
None => {
self.bounds.0 = self.bounds.1;
break;
}
}
}
Ok(None)
}
}
pub struct OptimisticMagicFinder<Direction> {
inner: MagicFinder<Direction>,
initial_guess: Option<(u64, bool)>,
}
const STACK_BUFFER_SIZE: usize = 8;
impl<'a, Direction: FinderDirection<'a>> OptimisticMagicFinder<Direction> {
pub fn new_empty() -> Self {
Self {
inner: MagicFinder::new(&[], 0, 0),
initial_guess: None,
}
}
pub fn repurpose(
&mut self,
magic_bytes: &'a [u8],
bounds: (u64, u64),
initial_guess: Option<(u64, bool)>,
) -> &mut Self {
debug_assert!(magic_bytes.len() <= STACK_BUFFER_SIZE);
self.inner.repurpose(magic_bytes, bounds);
self.initial_guess = initial_guess;
self
}
pub fn next<R: Read + Seek>(&mut self, reader: &mut R) -> ZipResult<Option<u64>> {
if let Some((v, mandatory)) = self.initial_guess {
reader.seek(SeekFrom::Start(v))?;
let mut buffer = [0; STACK_BUFFER_SIZE];
let buffer = &mut buffer[..self.inner.finder.needle().len()];
if v.saturating_add(buffer.len() as u64) <= self.inner.bounds.1 {
reader.read_exact(buffer)?;
if self.inner.finder.needle() == buffer {
self.initial_guess.take();
reader.seek(SeekFrom::Start(v))?;
return Ok(Some(v));
}
}
if mandatory {
return Ok(None);
}
self.initial_guess.take();
}
self.inner.next(reader)
}
}