zopfli/
cache.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
use alloc::vec::Vec;
use core::cmp;

use crate::{
    lz77::LongestMatch,
    util::{ZOPFLI_CACHE_LENGTH, ZOPFLI_MAX_MATCH, ZOPFLI_MIN_MATCH},
};

// Cache used by ZopfliFindLongestMatch to remember previously found length/dist
// values.
// This is needed because the squeeze runs will ask these values multiple times for
// the same position.
// Uses large amounts of memory, since it has to remember the distance belonging
// to every possible shorter-than-the-best length (the so called "sublen" array).
pub struct ZopfliLongestMatchCache {
    length: Vec<u16>,
    dist: Vec<u16>,
    sublen: Vec<u8>,
}

impl ZopfliLongestMatchCache {
    pub fn new(blocksize: usize) -> ZopfliLongestMatchCache {
        ZopfliLongestMatchCache {
            /* length > 0 and dist 0 is invalid combination, which indicates on purpose
            that this cache value is not filled in yet. */
            length: vec![1; blocksize],
            dist: vec![0; blocksize],
            /* Rather large amount of memory. */
            sublen: vec![0; ZOPFLI_CACHE_LENGTH * blocksize * 3],
        }
    }

    fn length_at(&self, pos: usize) -> u16 {
        self.length[pos]
    }

    fn dist_at(&self, pos: usize) -> u16 {
        self.dist[pos]
    }

    fn store_length_at(&mut self, pos: usize, val: u16) {
        self.length[pos] = val;
    }

    fn store_dist_at(&mut self, pos: usize, val: u16) {
        self.dist[pos] = val;
    }

    /// Returns the length up to which could be stored in the cache.
    fn max_sublen(&self, pos: usize) -> u32 {
        let start = ZOPFLI_CACHE_LENGTH * pos * 3;
        if self.sublen[start + 1] == 0 && self.sublen[start + 2] == 0 {
            return 0; // No sublen cached.
        }
        self.sublen[start + ((ZOPFLI_CACHE_LENGTH - 1) * 3)] as u32 + 3
    }

    /// Stores sublen array in the cache.
    fn store_sublen(&mut self, sublen: &[u16], pos: usize, length: usize) {
        if length < 3 {
            return;
        }

        let start = ZOPFLI_CACHE_LENGTH * pos * 3;
        let mut i = 3;
        let mut j = 0;
        let mut bestlength = 0;
        while i <= length {
            if i == length || sublen[i] != sublen[i + 1] {
                self.sublen[start + (j * 3)] = (i - 3) as u8;
                self.sublen[start + (j * 3 + 1)] = sublen[i].wrapping_rem(256) as u8;
                self.sublen[start + (j * 3 + 2)] = (sublen[i] >> 8).wrapping_rem(256) as u8;
                bestlength = i as u32;
                j += 1;
                if j >= ZOPFLI_CACHE_LENGTH {
                    break;
                }
            }
            i += 1;
        }

        if j < ZOPFLI_CACHE_LENGTH {
            debug_assert_eq!(bestlength, length as u32);
            self.sublen[start + ((ZOPFLI_CACHE_LENGTH - 1) * 3)] = (bestlength - 3) as u8;
        } else {
            debug_assert!(bestlength <= length as u32);
        }
        debug_assert_eq!(bestlength, self.max_sublen(pos));
    }

    /// Extracts sublen array from the cache.
    fn fetch_sublen(&self, pos: usize, length: usize, sublen: &mut [u16]) {
        if length < 3 {
            return;
        }

        let start = ZOPFLI_CACHE_LENGTH * pos * 3;
        let maxlength = self.max_sublen(pos) as usize;
        let mut prevlength = 0;

        for j in 0..ZOPFLI_CACHE_LENGTH {
            let length = self.sublen[start + (j * 3)] as usize + 3;
            let dist = self.sublen[start + (j * 3 + 1)] as u16
                + 256 * self.sublen[start + (j * 3 + 2)] as u16;

            let mut i = prevlength;
            while i <= length {
                sublen[i] = dist;
                i += 1;
            }
            if length == maxlength {
                break;
            }
            prevlength = length + 1;
        }
    }
}

pub trait Cache {
    fn try_get(
        &self,
        pos: usize,
        limit: usize,
        sublen: &mut Option<&mut [u16]>,
        blockstart: usize,
    ) -> LongestMatch;
    fn store(
        &mut self,
        pos: usize,
        limit: usize,
        sublen: &mut Option<&mut [u16]>,
        distance: u16,
        length: u16,
        blockstart: usize,
    );
}

pub struct NoCache;

impl Cache for NoCache {
    fn try_get(
        &self,
        _: usize,
        limit: usize,
        _: &mut Option<&mut [u16]>,
        _: usize,
    ) -> LongestMatch {
        LongestMatch::new(limit)
    }

    fn store(&mut self, _: usize, _: usize, _: &mut Option<&mut [u16]>, _: u16, _: u16, _: usize) {
        // Nowhere to store
    }
}

impl Cache for ZopfliLongestMatchCache {
    fn try_get(
        &self,
        pos: usize,
        mut limit: usize,
        sublen: &mut Option<&mut [u16]>,
        blockstart: usize,
    ) -> LongestMatch {
        let mut longest_match = LongestMatch::new(limit);

        /* The LMC cache starts at the beginning of the block rather than the
        beginning of the whole array. */
        let lmcpos = pos - blockstart;

        /* Length > 0 and dist 0 is invalid combination, which indicates on purpose
        that this cache value is not filled in yet. */
        let length_lmcpos = self.length_at(lmcpos);
        let dist_lmcpos = self.dist_at(lmcpos);
        let cache_available = length_lmcpos == 0 || dist_lmcpos != 0;
        let max_sublen = self.max_sublen(lmcpos);
        let limit_ok_for_cache = limit == ZOPFLI_MAX_MATCH
            || length_lmcpos <= limit as u16
            || (sublen.is_some() && max_sublen >= limit as u32);

        if limit_ok_for_cache && cache_available {
            if sublen.is_none() || length_lmcpos as u32 <= max_sublen {
                let length = cmp::min(length_lmcpos, limit as u16);
                let distance;
                if let Some(ref mut subl) = *sublen {
                    self.fetch_sublen(lmcpos, length as usize, subl);
                    distance = subl[length as usize];

                    if limit == ZOPFLI_MAX_MATCH && length >= ZOPFLI_MIN_MATCH as u16 {
                        debug_assert_eq!(subl[length as usize], dist_lmcpos);
                    }
                } else {
                    distance = dist_lmcpos;
                }
                longest_match.distance = distance;
                longest_match.length = length;
                longest_match.from_cache = true;
                return longest_match;
            }
            /* Can't use much of the cache, since the "sublens" need to be calculated,
            but at least we already know when to stop. */
            limit = length_lmcpos as usize;
            longest_match.limit = limit;
        }

        longest_match
    }

    fn store(
        &mut self,
        pos: usize,
        limit: usize,
        sublen: &mut Option<&mut [u16]>,
        distance: u16,
        length: u16,
        blockstart: usize,
    ) {
        if let Some(ref mut subl) = *sublen {
            /* Length > 0 and dist 0 is invalid combination, which indicates on purpose
            that this cache value is not filled in yet. */
            let lmcpos = pos - blockstart;
            let cache_available = self.length_at(lmcpos) == 0 || self.dist_at(lmcpos) != 0;

            if limit == ZOPFLI_MAX_MATCH && !cache_available {
                debug_assert!(self.length_at(lmcpos) == 1 && self.dist_at(lmcpos) == 0);
                if length < ZOPFLI_MIN_MATCH as u16 {
                    self.store_dist_at(lmcpos, 0);
                    self.store_length_at(lmcpos, 0);
                } else {
                    self.store_dist_at(lmcpos, distance);
                    self.store_length_at(lmcpos, length);
                }
                debug_assert!(!(self.length_at(lmcpos) == 1 && self.dist_at(lmcpos) == 0));
                self.store_sublen(subl, lmcpos, length as usize);
            }
        }
    }
}