Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

366
vendor/ruzstd/src/fse/fse_decoder.rs vendored Normal file
View File

@@ -0,0 +1,366 @@
use crate::bit_io::{BitReader, BitReaderReversed};
use crate::decoding::errors::{FSEDecoderError, FSETableError};
use alloc::vec::Vec;
pub struct FSEDecoder<'table> {
/// An FSE state value represents an index in the FSE table.
pub state: Entry,
/// A reference to the table used for decoding.
table: &'table FSETable,
}
impl<'t> FSEDecoder<'t> {
/// Initialize a new Finite State Entropy decoder.
pub fn new(table: &'t FSETable) -> FSEDecoder<'t> {
FSEDecoder {
state: table.decode.first().copied().unwrap_or(Entry {
base_line: 0,
num_bits: 0,
symbol: 0,
}),
table,
}
}
/// Returns the byte associated with the symbol the internal cursor is pointing at.
pub fn decode_symbol(&self) -> u8 {
self.state.symbol
}
/// Initialize internal state and prepare for decoding. After this, `decode_symbol` can be called
/// to read the first symbol and `update_state` can be called to prepare to read the next symbol.
pub fn init_state(&mut self, bits: &mut BitReaderReversed<'_>) -> Result<(), FSEDecoderError> {
if self.table.accuracy_log == 0 {
return Err(FSEDecoderError::TableIsUninitialized);
}
let new_state = bits.get_bits(self.table.accuracy_log);
self.state = self.table.decode[new_state as usize];
Ok(())
}
/// Advance the internal state to decode the next symbol in the bitstream.
pub fn update_state(&mut self, bits: &mut BitReaderReversed<'_>) {
let num_bits = self.state.num_bits;
let add = bits.get_bits(num_bits);
let base_line = self.state.base_line;
let new_state = base_line + add as u32;
self.state = self.table.decode[new_state as usize];
//println!("Update: {}, {} -> {}", base_line, add, self.state);
}
}
/// FSE decoding involves a decoding table that describes the probabilities of
/// all literals from 0 to the highest present one
///
/// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#fse-table-description>
#[derive(Debug, Clone)]
pub struct FSETable {
/// The maximum symbol in the table (inclusive). Limits the probabilities length to max_symbol + 1.
max_symbol: u8,
/// The actual table containing the decoded symbol and the compression data
/// connected to that symbol.
pub decode: Vec<Entry>, //used to decode symbols, and calculate the next state
/// The size of the table is stored in logarithm base 2 format,
/// with the **size of the table** being equal to `(1 << accuracy_log)`.
/// This value is used so that the decoder knows how many bits to read from the bitstream.
pub accuracy_log: u8,
/// In this context, probability refers to the likelihood that a symbol occurs in the given data.
/// Given this info, the encoder can assign shorter codes to symbols that appear more often,
/// and longer codes that appear less often, then the decoder can use the probability
/// to determine what code was assigned to what symbol.
///
/// The probability of a single symbol is a value representing the proportion of times the symbol
/// would fall within the data.
///
/// If a symbol probability is set to `-1`, it means that the probability of a symbol
/// occurring in the data is less than one.
pub symbol_probabilities: Vec<i32>, //used while building the decode Vector
/// The number of times each symbol occurs (The first entry being 0x0, the second being 0x1) and so on
/// up until the highest possible symbol (255).
symbol_counter: Vec<u32>,
}
impl FSETable {
/// Initialize a new empty Finite State Entropy decoding table.
pub fn new(max_symbol: u8) -> FSETable {
FSETable {
max_symbol,
symbol_probabilities: Vec::with_capacity(256), //will never be more than 256 symbols because u8
symbol_counter: Vec::with_capacity(256), //will never be more than 256 symbols because u8
decode: Vec::new(), //depending on acc_log.
accuracy_log: 0,
}
}
/// Reset `self` and update `self`'s state to mirror the provided table.
pub fn reinit_from(&mut self, other: &Self) {
self.reset();
self.symbol_counter.extend_from_slice(&other.symbol_counter);
self.symbol_probabilities
.extend_from_slice(&other.symbol_probabilities);
self.decode.extend_from_slice(&other.decode);
self.accuracy_log = other.accuracy_log;
}
/// Empty the table and clear all internal state.
pub fn reset(&mut self) {
self.symbol_counter.clear();
self.symbol_probabilities.clear();
self.decode.clear();
self.accuracy_log = 0;
}
/// returns how many BYTEs (not bits) were read while building the decoder
pub fn build_decoder(&mut self, source: &[u8], max_log: u8) -> Result<usize, FSETableError> {
self.accuracy_log = 0;
let bytes_read = self.read_probabilities(source, max_log)?;
self.build_decoding_table()?;
Ok(bytes_read)
}
/// Given the provided accuracy log, build a decoding table from that log.
pub fn build_from_probabilities(
&mut self,
acc_log: u8,
probs: &[i32],
) -> Result<(), FSETableError> {
if acc_log == 0 {
return Err(FSETableError::AccLogIsZero);
}
self.symbol_probabilities = probs.to_vec();
self.accuracy_log = acc_log;
self.build_decoding_table()
}
/// Build the actual decoding table after probabilities have been read into the table.
/// After this function is called, the decoding process can begin.
fn build_decoding_table(&mut self) -> Result<(), FSETableError> {
if self.symbol_probabilities.len() > self.max_symbol as usize + 1 {
return Err(FSETableError::TooManySymbols {
got: self.symbol_probabilities.len(),
});
}
self.decode.clear();
let table_size = 1 << self.accuracy_log;
if self.decode.len() < table_size {
self.decode.reserve(table_size - self.decode.len());
}
//fill with dummy entries
self.decode.resize(
table_size,
Entry {
base_line: 0,
num_bits: 0,
symbol: 0,
},
);
let mut negative_idx = table_size; //will point to the highest index with is already occupied by a negative-probability-symbol
//first scan for all -1 probabilities and place them at the top of the table
for symbol in 0..self.symbol_probabilities.len() {
if self.symbol_probabilities[symbol] == -1 {
negative_idx -= 1;
let entry = &mut self.decode[negative_idx];
entry.symbol = symbol as u8;
entry.base_line = 0;
entry.num_bits = self.accuracy_log;
}
}
//then place in a semi-random order all of the other symbols
let mut position = 0;
for idx in 0..self.symbol_probabilities.len() {
let symbol = idx as u8;
if self.symbol_probabilities[idx] <= 0 {
continue;
}
//for each probability point the symbol gets on slot
let prob = self.symbol_probabilities[idx];
for _ in 0..prob {
let entry = &mut self.decode[position];
entry.symbol = symbol;
position = next_position(position, table_size);
while position >= negative_idx {
position = next_position(position, table_size);
//everything above negative_idx is already taken
}
}
}
// baselines and num_bits can only be calculated when all symbols have been spread
self.symbol_counter.clear();
self.symbol_counter
.resize(self.symbol_probabilities.len(), 0);
for idx in 0..negative_idx {
let entry = &mut self.decode[idx];
let symbol = entry.symbol;
let prob = self.symbol_probabilities[symbol as usize];
let symbol_count = self.symbol_counter[symbol as usize];
let (bl, nb) = calc_baseline_and_numbits(table_size as u32, prob as u32, symbol_count);
//println!("symbol: {:2}, table: {}, prob: {:3}, count: {:3}, bl: {:3}, nb: {:2}", symbol, table_size, prob, symbol_count, bl, nb);
assert!(nb <= self.accuracy_log);
self.symbol_counter[symbol as usize] += 1;
entry.base_line = bl;
entry.num_bits = nb;
}
Ok(())
}
/// Read the accuracy log and the probability table from the source and return the number of bytes
/// read. If the size of the table is larger than the provided `max_log`, return an error.
fn read_probabilities(&mut self, source: &[u8], max_log: u8) -> Result<usize, FSETableError> {
self.symbol_probabilities.clear(); //just clear, we will fill a probability for each entry anyways. No need to force new allocs here
let mut br = BitReader::new(source);
self.accuracy_log = ACC_LOG_OFFSET + (br.get_bits(4)? as u8);
if self.accuracy_log > max_log {
return Err(FSETableError::AccLogTooBig {
got: self.accuracy_log,
max: max_log,
});
}
if self.accuracy_log == 0 {
return Err(FSETableError::AccLogIsZero);
}
let probability_sum = 1 << self.accuracy_log;
let mut probability_counter = 0;
while probability_counter < probability_sum {
let max_remaining_value = probability_sum - probability_counter + 1;
let bits_to_read = highest_bit_set(max_remaining_value);
let unchecked_value = br.get_bits(bits_to_read as usize)? as u32;
let low_threshold = ((1 << bits_to_read) - 1) - (max_remaining_value);
let mask = (1 << (bits_to_read - 1)) - 1;
let small_value = unchecked_value & mask;
let value = if small_value < low_threshold {
br.return_bits(1);
small_value
} else if unchecked_value > mask {
unchecked_value - low_threshold
} else {
unchecked_value
};
//println!("{}, {}, {}", self.symbol_probablilities.len(), unchecked_value, value);
let prob = (value as i32) - 1;
self.symbol_probabilities.push(prob);
if prob != 0 {
if prob > 0 {
probability_counter += prob as u32;
} else {
// probability -1 counts as 1
assert!(prob == -1);
probability_counter += 1;
}
} else {
//fast skip further zero probabilities
loop {
let skip_amount = br.get_bits(2)? as usize;
self.symbol_probabilities
.resize(self.symbol_probabilities.len() + skip_amount, 0);
if skip_amount != 3 {
break;
}
}
}
}
if probability_counter != probability_sum {
return Err(FSETableError::ProbabilityCounterMismatch {
got: probability_counter,
expected_sum: probability_sum,
symbol_probabilities: self.symbol_probabilities.clone(),
});
}
if self.symbol_probabilities.len() > self.max_symbol as usize + 1 {
return Err(FSETableError::TooManySymbols {
got: self.symbol_probabilities.len(),
});
}
let bytes_read = if br.bits_read() % 8 == 0 {
br.bits_read() / 8
} else {
(br.bits_read() / 8) + 1
};
Ok(bytes_read)
}
}
/// A single entry in an FSE table.
#[derive(Copy, Clone, Debug)]
pub struct Entry {
/// This value is used as an offset value, and it is added
/// to a value read from the stream to determine the next state value.
pub base_line: u32,
/// How many bits should be read from the stream when decoding this entry.
pub num_bits: u8,
/// The byte that should be put in the decode output when encountering this state.
pub symbol: u8,
}
/// This value is added to the first 4 bits of the stream to determine the
/// `Accuracy_Log`
const ACC_LOG_OFFSET: u8 = 5;
fn highest_bit_set(x: u32) -> u32 {
assert!(x > 0);
u32::BITS - x.leading_zeros()
}
//utility functions for building the decoding table from probabilities
/// Calculate the position of the next entry of the table given the current
/// position and size of the table.
fn next_position(mut p: usize, table_size: usize) -> usize {
p += (table_size >> 1) + (table_size >> 3) + 3;
p &= table_size - 1;
p
}
fn calc_baseline_and_numbits(
num_states_total: u32,
num_states_symbol: u32,
state_number: u32,
) -> (u32, u8) {
if num_states_symbol == 0 {
return (0, 0);
}
let num_state_slices = if 1 << (highest_bit_set(num_states_symbol) - 1) == num_states_symbol {
num_states_symbol
} else {
1 << (highest_bit_set(num_states_symbol))
}; //always power of two
let num_double_width_state_slices = num_state_slices - num_states_symbol; //leftovers to the power of two need to be distributed
let num_single_width_state_slices = num_states_symbol - num_double_width_state_slices; //these will not receive a double width slice of states
let slice_width = num_states_total / num_state_slices; //size of a single width slice of states
let num_bits = highest_bit_set(slice_width) - 1; //number of bits needed to read for one slice
if state_number < num_double_width_state_slices {
let baseline = num_single_width_state_slices * slice_width + state_number * slice_width * 2;
(baseline, num_bits as u8 + 1)
} else {
let index_shifted = state_number - num_double_width_state_slices;
((index_shifted * slice_width), num_bits as u8)
}
}

445
vendor/ruzstd/src/fse/fse_encoder.rs vendored Normal file
View File

@@ -0,0 +1,445 @@
use crate::bit_io::BitWriter;
use alloc::vec::Vec;
pub(crate) struct FSEEncoder<'output, V: AsMut<Vec<u8>>> {
pub(super) table: FSETable,
writer: &'output mut BitWriter<V>,
}
impl<V: AsMut<Vec<u8>>> FSEEncoder<'_, V> {
pub fn new(table: FSETable, writer: &mut BitWriter<V>) -> FSEEncoder<'_, V> {
FSEEncoder { table, writer }
}
#[cfg(any(test, feature = "fuzz_exports"))]
pub fn into_table(self) -> FSETable {
self.table
}
/// Encodes the data using the provided table
/// Writes
/// * Table description
/// * Encoded data
/// * Last state index
/// * Padding bits to fill up last byte
#[cfg(any(test, feature = "fuzz_exports"))]
pub fn encode(&mut self, data: &[u8]) {
self.write_table();
let mut state = self.table.start_state(data[data.len() - 1]);
for x in data[0..data.len() - 1].iter().rev().copied() {
let next = self.table.next_state(x, state.index);
let diff = state.index - next.baseline;
self.writer.write_bits(diff as u64, next.num_bits as usize);
state = next;
}
self.writer
.write_bits(state.index as u64, self.acc_log() as usize);
let bits_to_fill = self.writer.misaligned();
if bits_to_fill == 0 {
self.writer.write_bits(1u32, 8);
} else {
self.writer.write_bits(1u32, bits_to_fill);
}
}
/// Encodes the data using the provided table but with two interleaved streams
/// Writes
/// * Table description
/// * Encoded data with two interleaved states
/// * Both Last state indexes
/// * Padding bits to fill up last byte
pub fn encode_interleaved(&mut self, data: &[u8]) {
self.write_table();
let mut state_1 = self.table.start_state(data[data.len() - 1]);
let mut state_2 = self.table.start_state(data[data.len() - 2]);
// The first two symbols are represented by the start states
// Then encode the state transitions for two symbols at a time
let mut idx = data.len() - 4;
loop {
{
let state = state_1;
let x = data[idx + 1];
let next = self.table.next_state(x, state.index);
let diff = state.index - next.baseline;
self.writer.write_bits(diff as u64, next.num_bits as usize);
state_1 = next;
}
{
let state = state_2;
let x = data[idx];
let next = self.table.next_state(x, state.index);
let diff = state.index - next.baseline;
self.writer.write_bits(diff as u64, next.num_bits as usize);
state_2 = next;
}
if idx < 2 {
break;
}
idx -= 2;
}
// Determine if we have an even or odd number of symbols to encode
// If odd we need to encode the last states transition and encode the final states in the flipped order
if idx == 1 {
let state = state_1;
let x = data[0];
let next = self.table.next_state(x, state.index);
let diff = state.index - next.baseline;
self.writer.write_bits(diff as u64, next.num_bits as usize);
state_1 = next;
self.writer
.write_bits(state_2.index as u64, self.acc_log() as usize);
self.writer
.write_bits(state_1.index as u64, self.acc_log() as usize);
} else {
self.writer
.write_bits(state_1.index as u64, self.acc_log() as usize);
self.writer
.write_bits(state_2.index as u64, self.acc_log() as usize);
}
let bits_to_fill = self.writer.misaligned();
if bits_to_fill == 0 {
self.writer.write_bits(1u32, 8);
} else {
self.writer.write_bits(1u32, bits_to_fill);
}
}
fn write_table(&mut self) {
self.table.write_table(self.writer);
}
pub(super) fn acc_log(&self) -> u8 {
self.table.acc_log()
}
}
#[derive(Debug, Clone)]
pub struct FSETable {
/// Indexed by symbol
pub(super) states: [SymbolStates; 256],
/// Sum of all states.states.len()
pub(crate) table_size: usize,
}
impl FSETable {
pub(crate) fn next_state(&self, symbol: u8, idx: usize) -> &State {
let states = &self.states[symbol as usize];
states.get(idx, self.table_size)
}
pub(crate) fn start_state(&self, symbol: u8) -> &State {
let states = &self.states[symbol as usize];
&states.states[0]
}
pub fn acc_log(&self) -> u8 {
self.table_size.ilog2() as u8
}
pub fn write_table<V: AsMut<Vec<u8>>>(&self, writer: &mut BitWriter<V>) {
writer.write_bits(self.acc_log() - 5, 4);
let mut probability_counter = 0usize;
let probability_sum = 1 << self.acc_log();
let mut prob_idx = 0;
while probability_counter < probability_sum {
let max_remaining_value = probability_sum - probability_counter + 1;
let bits_to_write = max_remaining_value.ilog2() + 1;
let low_threshold = ((1 << bits_to_write) - 1) - (max_remaining_value);
let mask = (1 << (bits_to_write - 1)) - 1;
let prob = self.states[prob_idx].probability;
prob_idx += 1;
let value = (prob + 1) as u32;
if value < low_threshold as u32 {
writer.write_bits(value, bits_to_write as usize - 1);
} else if value > mask {
writer.write_bits(value + low_threshold as u32, bits_to_write as usize);
} else {
writer.write_bits(value, bits_to_write as usize);
}
if prob == -1 {
probability_counter += 1;
} else if prob > 0 {
probability_counter += prob as usize;
} else {
let mut zeros = 0u8;
while self.states[prob_idx].probability == 0 {
zeros += 1;
prob_idx += 1;
if zeros == 3 {
writer.write_bits(3u8, 2);
zeros = 0;
}
}
writer.write_bits(zeros, 2);
}
}
writer.write_bits(0u8, writer.misaligned());
}
}
#[derive(Debug, Clone)]
pub(super) struct SymbolStates {
/// Sorted by baseline to allow easy lookup using an index
pub(super) states: Vec<State>,
pub(super) probability: i32,
}
impl SymbolStates {
fn get(&self, idx: usize, max_idx: usize) -> &State {
let start_search_at = (idx * self.states.len()) / max_idx;
self.states[start_search_at..]
.iter()
.find(|state| state.contains(idx))
.unwrap()
}
}
#[derive(Debug, Clone)]
pub(crate) struct State {
/// How many bits the range of this state needs to be encoded as
pub(crate) num_bits: u8,
/// The first index targeted by this state
pub(crate) baseline: usize,
/// The last index targeted by this state (baseline + the maximum number with numbits bits allows)
pub(crate) last_index: usize,
/// Index of this state in the decoding table
pub(crate) index: usize,
}
impl State {
fn contains(&self, idx: usize) -> bool {
self.baseline <= idx && self.last_index >= idx
}
}
pub fn build_table_from_data(
data: impl Iterator<Item = u8>,
max_log: u8,
avoid_0_numbit: bool,
) -> FSETable {
let mut counts = [0; 256];
let mut max_symbol = 0;
for x in data {
counts[x as usize] += 1;
}
for (idx, count) in counts.iter().copied().enumerate() {
if count > 0 {
max_symbol = idx;
}
}
build_table_from_counts(&counts[..=max_symbol], max_log, avoid_0_numbit)
}
fn build_table_from_counts(counts: &[usize], max_log: u8, avoid_0_numbit: bool) -> FSETable {
let mut probs = [0; 256];
let probs = &mut probs[..counts.len()];
let mut min_count = 0;
for (idx, count) in counts.iter().copied().enumerate() {
probs[idx] = count as i32;
if count > 0 && (count < min_count || min_count == 0) {
min_count = count;
}
}
// shift all probabilities down so that the lowest are 1
min_count -= 1;
let mut max_prob = 0i32;
for prob in probs.iter_mut() {
if *prob > 0 {
*prob -= min_count as i32;
}
max_prob = max_prob.max(*prob);
}
if max_prob > 0 && max_prob as usize > probs.len() {
let divisor = max_prob / (probs.len() as i32);
for prob in probs.iter_mut() {
if *prob > 0 {
*prob = (*prob / divisor).max(1)
}
}
}
// normalize probabilities to a 2^x
let sum = probs.iter().sum::<i32>();
assert!(sum > 0);
let sum = sum as usize;
let acc_log = (sum.ilog2() as u8 + 1).max(5);
let acc_log = u8::min(acc_log, max_log);
if sum < 1 << acc_log {
// just raise the maximum probability as much as possible
// TODO is this optimal?
let diff = (1 << acc_log) - sum;
let max = probs.iter_mut().max().unwrap();
*max += diff as i32;
} else {
// decrease the smallest ones to 1 first
let mut diff = sum - (1 << acc_log);
while diff > 0 {
let min = probs.iter_mut().filter(|prob| **prob > 1).min().unwrap();
let decrease = usize::min(*min as usize - 1, diff);
diff -= decrease;
*min -= decrease as i32;
}
}
let max = probs.iter_mut().max().unwrap();
if avoid_0_numbit && *max > 1 << (acc_log - 1) {
let redistribute = *max - (1 << (acc_log - 1));
*max -= redistribute;
let max = *max;
// find first occurence of the second_max to avoid lifting the last zero
let second_max = *probs.iter_mut().filter(|x| **x != max).max().unwrap();
let second_max = probs.iter_mut().find(|x| **x == second_max).unwrap();
*second_max += redistribute;
assert!(*second_max <= max);
}
build_table_from_probabilities(probs, acc_log)
}
pub(super) fn build_table_from_probabilities(probs: &[i32], acc_log: u8) -> FSETable {
let mut states = core::array::from_fn::<SymbolStates, 256, _>(|_| SymbolStates {
states: Vec::new(),
probability: 0,
});
// distribute -1 symbols
let mut negative_idx = (1 << acc_log) - 1;
for (symbol, _prob) in probs
.iter()
.copied()
.enumerate()
.filter(|prob| prob.1 == -1)
{
states[symbol].states.push(State {
num_bits: acc_log,
baseline: 0,
last_index: (1 << acc_log) - 1,
index: negative_idx,
});
states[symbol].probability = -1;
negative_idx -= 1;
}
// distribute other symbols
// Setup all needed states per symbol with their respective index
let mut idx = 0;
for (symbol, prob) in probs.iter().copied().enumerate() {
if prob <= 0 {
continue;
}
states[symbol].probability = prob;
let states = &mut states[symbol].states;
for _ in 0..prob {
states.push(State {
num_bits: 0,
baseline: 0,
last_index: 0,
index: idx,
});
idx = next_position(idx, 1 << acc_log);
while idx > negative_idx {
idx = next_position(idx, 1 << acc_log);
}
}
assert_eq!(states.len(), prob as usize);
}
// After all states know their index we can determine the numbits and baselines
for (symbol, prob) in probs.iter().copied().enumerate() {
if prob <= 0 {
continue;
}
let prob = prob as u32;
let state = &mut states[symbol];
// We process the states in their order in the table
state.states.sort_by(|l, r| l.index.cmp(&r.index));
let prob_log = if prob.is_power_of_two() {
prob.ilog2()
} else {
prob.ilog2() + 1
};
let rounded_up = 1u32 << prob_log;
// The lower states target double the amount of indexes -> numbits + 1
let double_states = rounded_up - prob;
let single_states = prob - double_states;
let num_bits = acc_log - prob_log as u8;
let mut baseline = (single_states as usize * (1 << (num_bits))) % (1 << acc_log);
for (idx, state) in state.states.iter_mut().enumerate() {
if (idx as u32) < double_states {
let num_bits = num_bits + 1;
state.baseline = baseline;
state.num_bits = num_bits;
state.last_index = baseline + ((1 << num_bits) - 1);
baseline += 1 << num_bits;
baseline %= 1 << acc_log;
} else {
state.baseline = baseline;
state.num_bits = num_bits;
state.last_index = baseline + ((1 << num_bits) - 1);
baseline += 1 << num_bits;
}
}
// For encoding we use the states ordered by the indexes they target
state.states.sort_by(|l, r| l.baseline.cmp(&r.baseline));
}
FSETable {
table_size: 1 << acc_log,
states,
}
}
/// Calculate the position of the next entry of the table given the current
/// position and size of the table.
fn next_position(mut p: usize, table_size: usize) -> usize {
p += (table_size >> 1) + (table_size >> 3) + 3;
p &= table_size - 1;
p
}
const ML_DIST: &[i32] = &[
1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1,
];
const LL_DIST: &[i32] = &[
4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
-1, -1, -1, -1,
];
const OF_DIST: &[i32] = &[
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1,
];
pub(crate) fn default_ml_table() -> FSETable {
build_table_from_probabilities(ML_DIST, 6)
}
pub(crate) fn default_ll_table() -> FSETable {
build_table_from_probabilities(LL_DIST, 6)
}
pub(crate) fn default_of_table() -> FSETable {
build_table_from_probabilities(OF_DIST, 5)
}

139
vendor/ruzstd/src/fse/mod.rs vendored Normal file
View File

@@ -0,0 +1,139 @@
//! FSE, short for Finite State Entropy, is an encoding technique
//! that assigns shorter codes to symbols that appear more frequently in data,
//! and longer codes to less frequent symbols.
//!
//! FSE works by mutating a state and using that state to index into a table.
//!
//! Zstandard uses two different kinds of entropy encoding: FSE, and Huffman coding.
//! Huffman is used to compress literals,
//! while FSE is used for all other symbols (literal length code, match length code, offset code).
//!
//! <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#fse>
//!
//! <https://arxiv.org/pdf/1311.2540>
mod fse_decoder;
pub use fse_decoder::*;
pub mod fse_encoder;
#[test]
fn tables_equal() {
let probs = &[0, 0, -1, 3, 2, 2, (1 << 6) - 8];
let mut dec_table = FSETable::new(255);
dec_table.build_from_probabilities(6, probs).unwrap();
let enc_table = fse_encoder::build_table_from_probabilities(probs, 6);
check_tables(&dec_table, &enc_table);
}
#[cfg(any(test, feature = "fuzz_exports"))]
fn check_tables(dec_table: &fse_decoder::FSETable, enc_table: &fse_encoder::FSETable) {
for (idx, dec_state) in dec_table.decode.iter().enumerate() {
let enc_states = &enc_table.states[dec_state.symbol as usize];
let enc_state = enc_states
.states
.iter()
.find(|state| state.index == idx)
.unwrap();
assert_eq!(enc_state.baseline, dec_state.base_line as usize);
assert_eq!(enc_state.num_bits, dec_state.num_bits);
}
}
#[test]
fn roundtrip() {
round_trip(&(0..64).collect::<alloc::vec::Vec<_>>());
let mut data = alloc::vec![];
data.extend(0..32);
data.extend(0..32);
data.extend(0..32);
data.extend(0..32);
data.extend(0..32);
data.extend(20..32);
data.extend(20..32);
data.extend(0..32);
data.extend(20..32);
data.extend(100..255);
data.extend(20..32);
data.extend(20..32);
round_trip(&data);
#[cfg(feature = "std")]
if std::fs::exists("fuzz/artifacts/fse").unwrap_or(false) {
for file in std::fs::read_dir("fuzz/artifacts/fse").unwrap() {
if file.as_ref().unwrap().file_type().unwrap().is_file() {
let data = std::fs::read(file.unwrap().path()).unwrap();
round_trip(&data);
}
}
}
}
/// Only needed for testing.
///
/// Encodes the data with a table built from that data
/// Decodes the result again by first decoding the table and then the data
/// Asserts that the decoded data equals the input
#[cfg(any(test, feature = "fuzz_exports"))]
pub fn round_trip(data: &[u8]) {
use crate::bit_io::{BitReaderReversed, BitWriter};
use fse_encoder::FSEEncoder;
if data.len() < 2 {
return;
}
if data.iter().all(|x| *x == data[0]) {
return;
}
if data.len() < 64 {
return;
}
let mut writer = BitWriter::new();
let mut encoder = FSEEncoder::new(
fse_encoder::build_table_from_data(data.iter().copied(), 22, false),
&mut writer,
);
let mut dec_table = FSETable::new(255);
encoder.encode(data);
let acc_log = encoder.acc_log();
let enc_table = encoder.into_table();
let encoded = writer.dump();
let table_bytes = dec_table.build_decoder(&encoded, acc_log).unwrap();
let encoded = &encoded[table_bytes..];
let mut decoder = FSEDecoder::new(&dec_table);
check_tables(&dec_table, &enc_table);
let mut br = BitReaderReversed::new(encoded);
let mut skipped_bits = 0;
loop {
let val = br.get_bits(1);
skipped_bits += 1;
if val == 1 || skipped_bits > 8 {
break;
}
}
if skipped_bits > 8 {
//if more than 7 bits are 0, this is not the correct end of the bitstream. Either a bug or corrupted data
panic!("Corrupted end marker");
}
decoder.init_state(&mut br).unwrap();
let mut decoded = alloc::vec::Vec::new();
for x in data {
let w = decoder.decode_symbol();
assert_eq!(w, *x);
decoded.push(w);
if decoded.len() < data.len() {
decoder.update_state(&mut br);
}
}
assert_eq!(&decoded, data);
assert_eq!(br.bits_remaining(), 0);
}