Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

401
vendor/ruzstd/src/huff0/huff0_decoder.rs vendored Normal file
View File

@@ -0,0 +1,401 @@
//! Utilities for decoding Huff0 encoded huffman data.
use crate::bit_io::BitReaderReversed;
use crate::decoding::errors::HuffmanTableError;
use crate::fse::{FSEDecoder, FSETable};
use alloc::vec::Vec;
/// The Zstandard specification limits the maximum length of a code to 11 bits.
pub(crate) const MAX_MAX_NUM_BITS: u8 = 11;
pub struct HuffmanDecoder<'table> {
table: &'table HuffmanTable,
/// State is used to index into the table.
pub state: u64,
}
impl<'t> HuffmanDecoder<'t> {
/// Create a new decoder with the provided table
pub fn new(table: &'t HuffmanTable) -> HuffmanDecoder<'t> {
HuffmanDecoder { table, state: 0 }
}
/// Decode the symbol the internal state (cursor) is pointed at and return the
/// decoded literal.
pub fn decode_symbol(&mut self) -> u8 {
self.table.decode[self.state as usize].symbol
}
/// Initialize internal state and prepare to decode data. Then, `decode_symbol` can be called
/// to read the byte the internal cursor is pointing at, and `next_state` can be called to advance
/// the cursor until the max number of bits has been read.
pub fn init_state(&mut self, br: &mut BitReaderReversed<'_>) -> u8 {
let num_bits = self.table.max_num_bits;
let new_bits = br.get_bits(num_bits);
self.state = new_bits;
num_bits
}
/// Advance the internal cursor to the next symbol. After this, you can call `decode_symbol`
/// to read from the new position.
pub fn next_state(&mut self, br: &mut BitReaderReversed<'_>) -> u8 {
// self.state stores a small section, or a window of the bit stream. The table can be indexed via this state,
// telling you how many bits identify the current symbol.
let num_bits = self.table.decode[self.state as usize].num_bits;
// New bits are read from the stream
let new_bits = br.get_bits(num_bits);
// Shift and mask out the bits that identify the current symbol
self.state <<= num_bits;
self.state &= self.table.decode.len() as u64 - 1;
// The new bits are appended at the end of the current state.
self.state |= new_bits;
num_bits
}
}
/// A Huffman decoding table contains a list of Huffman prefix codes and their associated values
pub struct HuffmanTable {
decode: Vec<Entry>,
/// The weight of a symbol is the number of occurences in a table.
/// This value is used in constructing a binary tree referred to as
/// a Huffman tree. Once this tree is constructed, it can be used to build the
/// lookup table
weights: Vec<u8>,
/// The maximum size in bits a prefix code in the encoded data can be.
/// This value is used so that the decoder knows how many bits
/// to read from the bitstream before checking the table. This
/// value must be 11 or lower.
pub max_num_bits: u8,
bits: Vec<u8>,
bit_ranks: Vec<u32>,
rank_indexes: Vec<usize>,
/// In some cases, the list of weights is compressed using FSE compression.
fse_table: FSETable,
}
impl HuffmanTable {
/// Create a new, empty table.
pub fn new() -> HuffmanTable {
HuffmanTable {
decode: Vec::new(),
weights: Vec::with_capacity(256),
max_num_bits: 0,
bits: Vec::with_capacity(256),
bit_ranks: Vec::with_capacity(11),
rank_indexes: Vec::with_capacity(11),
fse_table: FSETable::new(255),
}
}
/// Completely empty the table then repopulate as a replica
/// of `other`.
pub fn reinit_from(&mut self, other: &Self) {
self.reset();
self.decode.extend_from_slice(&other.decode);
self.weights.extend_from_slice(&other.weights);
self.max_num_bits = other.max_num_bits;
self.bits.extend_from_slice(&other.bits);
self.rank_indexes.extend_from_slice(&other.rank_indexes);
self.fse_table.reinit_from(&other.fse_table);
}
/// Completely empty the table of all data.
pub fn reset(&mut self) {
self.decode.clear();
self.weights.clear();
self.max_num_bits = 0;
self.bits.clear();
self.bit_ranks.clear();
self.rank_indexes.clear();
self.fse_table.reset();
}
/// Read from `source` and decode the input, populating the huffman decoding table.
///
/// Returns the number of bytes read.
pub fn build_decoder(&mut self, source: &[u8]) -> Result<u32, HuffmanTableError> {
self.decode.clear();
let bytes_used = self.read_weights(source)?;
self.build_table_from_weights()?;
Ok(bytes_used)
}
/// Read weights from the provided source.
///
/// The huffman table is represented in the input data as a list of weights.
/// After the header, weights are read, then a Huffman decoding table
/// can be constructed using that list of weights.
///
/// Returns the number of bytes read.
fn read_weights(&mut self, source: &[u8]) -> Result<u32, HuffmanTableError> {
use HuffmanTableError as err;
if source.is_empty() {
return Err(err::SourceIsEmpty);
}
let header = source[0];
let mut bits_read = 8;
match header {
// If the header byte is less than 128, the series of weights
// is compressed using two interleaved FSE streams that share
// a distribution table.
0..=127 => {
let fse_stream = &source[1..];
if header as usize > fse_stream.len() {
return Err(err::NotEnoughBytesForWeights {
got_bytes: fse_stream.len(),
expected_bytes: header,
});
}
//fse decompress weights
let bytes_used_by_fse_header = self.fse_table.build_decoder(fse_stream, 6)?;
if bytes_used_by_fse_header > header as usize {
return Err(err::FSETableUsedTooManyBytes {
used: bytes_used_by_fse_header,
available_bytes: header,
});
}
vprintln!(
"Building fse table for huffman weights used: {}",
bytes_used_by_fse_header
);
// Huffman headers are compressed using two interleaved
// FSE bitstreams, where the first state (decoder) handles
// even symbols, and the second handles odd symbols.
let mut dec1 = FSEDecoder::new(&self.fse_table);
let mut dec2 = FSEDecoder::new(&self.fse_table);
let compressed_start = bytes_used_by_fse_header;
let compressed_length = header as usize - bytes_used_by_fse_header;
let compressed_weights = &fse_stream[compressed_start..];
if compressed_weights.len() < compressed_length {
return Err(err::NotEnoughBytesToDecompressWeights {
have: compressed_weights.len(),
need: compressed_length,
});
}
let compressed_weights = &compressed_weights[..compressed_length];
let mut br = BitReaderReversed::new(compressed_weights);
bits_read += (bytes_used_by_fse_header + compressed_length) * 8;
//skip the 0 padding at the end of the last byte of the bit stream and throw away the first 1 found
let mut skipped_bits = 0;
loop {
let val = br.get_bits(1);
skipped_bits += 1;
if val == 1 || skipped_bits > 8 {
break;
}
}
if skipped_bits > 8 {
//if more than 7 bits are 0, this is not the correct end of the bitstream. Either a bug or corrupted data
return Err(err::ExtraPadding { skipped_bits });
}
dec1.init_state(&mut br)?;
dec2.init_state(&mut br)?;
self.weights.clear();
// The two decoders take turns decoding a single symbol and updating their state.
loop {
let w = dec1.decode_symbol();
self.weights.push(w);
dec1.update_state(&mut br);
if br.bits_remaining() <= -1 {
//collect final states
self.weights.push(dec2.decode_symbol());
break;
}
let w = dec2.decode_symbol();
self.weights.push(w);
dec2.update_state(&mut br);
if br.bits_remaining() <= -1 {
//collect final states
self.weights.push(dec1.decode_symbol());
break;
}
//maximum number of weights is 255 because we use u8 symbols and the last weight is inferred from the sum of all others
if self.weights.len() > 255 {
return Err(err::TooManyWeights {
got: self.weights.len(),
});
}
}
}
// If the header byte is greater than or equal to 128,
// weights are directly represented, where each weight is
// encoded directly as a 4 bit field. The weights will
// always be encoded with full bytes, meaning if there's
// an odd number of weights, the last weight will still
// occupy a full byte.
_ => {
// weights are directly encoded
let weights_raw = &source[1..];
let num_weights = header - 127;
self.weights.resize(num_weights as usize, 0);
let bytes_needed = if num_weights % 2 == 0 {
num_weights as usize / 2
} else {
(num_weights as usize / 2) + 1
};
if weights_raw.len() < bytes_needed {
return Err(err::NotEnoughBytesInSource {
got: weights_raw.len(),
need: bytes_needed,
});
}
for idx in 0..num_weights {
if idx % 2 == 0 {
self.weights[idx as usize] = weights_raw[idx as usize / 2] >> 4;
} else {
self.weights[idx as usize] = weights_raw[idx as usize / 2] & 0xF;
}
bits_read += 4;
}
}
}
let bytes_read = if bits_read % 8 == 0 {
bits_read / 8
} else {
(bits_read / 8) + 1
};
Ok(bytes_read as u32)
}
/// Once the weights have been read from the data, you can decode the weights
/// into a table, and use that table to decode the actual compressed data.
///
/// This function populates the rest of the table from the series of weights.
fn build_table_from_weights(&mut self) -> Result<(), HuffmanTableError> {
use HuffmanTableError as err;
self.bits.clear();
self.bits.resize(self.weights.len() + 1, 0);
let mut weight_sum: u32 = 0;
for w in &self.weights {
if *w > MAX_MAX_NUM_BITS {
return Err(err::WeightBiggerThanMaxNumBits { got: *w });
}
weight_sum += if *w > 0 { 1_u32 << (*w - 1) } else { 0 };
}
if weight_sum == 0 {
return Err(err::MissingWeights);
}
let max_bits = highest_bit_set(weight_sum) as u8;
let left_over = (1 << max_bits) - weight_sum;
//left_over must be power of two
if !left_over.is_power_of_two() {
return Err(err::LeftoverIsNotAPowerOf2 { got: left_over });
}
let last_weight = highest_bit_set(left_over) as u8;
for symbol in 0..self.weights.len() {
let bits = if self.weights[symbol] > 0 {
max_bits + 1 - self.weights[symbol]
} else {
0
};
self.bits[symbol] = bits;
}
self.bits[self.weights.len()] = max_bits + 1 - last_weight;
self.max_num_bits = max_bits;
if max_bits > MAX_MAX_NUM_BITS {
return Err(err::MaxBitsTooHigh { got: max_bits });
}
self.bit_ranks.clear();
self.bit_ranks.resize((max_bits + 1) as usize, 0);
for num_bits in &self.bits {
self.bit_ranks[(*num_bits) as usize] += 1;
}
//fill with dummy symbols
self.decode.resize(
1 << self.max_num_bits,
Entry {
symbol: 0,
num_bits: 0,
},
);
//starting codes for each rank
self.rank_indexes.clear();
self.rank_indexes.resize((max_bits + 1) as usize, 0);
self.rank_indexes[max_bits as usize] = 0;
for bits in (1..self.rank_indexes.len() as u8).rev() {
self.rank_indexes[bits as usize - 1] = self.rank_indexes[bits as usize]
+ self.bit_ranks[bits as usize] as usize * (1 << (max_bits - bits));
}
assert!(
self.rank_indexes[0] == self.decode.len(),
"rank_idx[0]: {} should be: {}",
self.rank_indexes[0],
self.decode.len()
);
for symbol in 0..self.bits.len() {
let bits_for_symbol = self.bits[symbol];
if bits_for_symbol != 0 {
// allocate code for the symbol and set in the table
// a code ignores all max_bits - bits[symbol] bits, so it gets
// a range that spans all of those in the decoding table
let base_idx = self.rank_indexes[bits_for_symbol as usize];
let len = 1 << (max_bits - bits_for_symbol);
self.rank_indexes[bits_for_symbol as usize] += len;
for idx in 0..len {
self.decode[base_idx + idx].symbol = symbol as u8;
self.decode[base_idx + idx].num_bits = bits_for_symbol;
}
}
}
Ok(())
}
}
impl Default for HuffmanTable {
fn default() -> Self {
Self::new()
}
}
/// A single entry in the table contains the decoded symbol/literal and the
/// size of the prefix code.
#[derive(Copy, Clone, Debug)]
pub struct Entry {
/// The byte that the prefix code replaces during encoding.
symbol: u8,
/// The number of bits the prefix code occupies.
num_bits: u8,
}
/// Assert that the provided value is greater than zero, and returns the
/// 32 - the number of leading zeros
fn highest_bit_set(x: u32) -> u32 {
assert!(x > 0);
u32::BITS - x.leading_zeros()
}

484
vendor/ruzstd/src/huff0/huff0_encoder.rs vendored Normal file
View File

@@ -0,0 +1,484 @@
use alloc::vec::Vec;
use core::cmp::Ordering;
use crate::{
bit_io::BitWriter,
fse::fse_encoder::{self, FSEEncoder},
};
pub(crate) struct HuffmanEncoder<'output, 'table, V: AsMut<Vec<u8>>> {
table: &'table HuffmanTable,
writer: &'output mut BitWriter<V>,
}
impl<V: AsMut<Vec<u8>>> HuffmanEncoder<'_, '_, V> {
pub fn new<'o, 't>(
table: &'t HuffmanTable,
writer: &'o mut BitWriter<V>,
) -> HuffmanEncoder<'o, 't, V> {
HuffmanEncoder { table, writer }
}
/// Encodes the data using the provided table
/// Writes
/// * Table description
/// * Encoded data
/// * Padding bits to fill up last byte
pub fn encode(&mut self, data: &[u8], with_table: bool) {
if with_table {
self.write_table();
}
Self::encode_stream(self.table, self.writer, data);
}
/// Encodes the data using the provided table in 4 concatenated streams
/// Writes
/// * Table description
/// * Jumptable
/// * Encoded data in 4 streams, each padded to fill the last byte
pub fn encode4x(&mut self, data: &[u8], with_table: bool) {
assert!(data.len() >= 4);
// Split data in 4 equally sized parts (the last one might be a bit smaller than the rest)
let split_size = data.len().div_ceil(4);
let src1 = &data[..split_size];
let src2 = &data[split_size..split_size * 2];
let src3 = &data[split_size * 2..split_size * 3];
let src4 = &data[split_size * 3..];
// Write table description
if with_table {
self.write_table();
}
// Reserve space for the jump table, will be changed later
let size_idx = self.writer.index();
self.writer.write_bits(0u16, 16);
self.writer.write_bits(0u16, 16);
self.writer.write_bits(0u16, 16);
// Write the 4 streams, noting the sizes of the encoded streams
let index_before = self.writer.index();
Self::encode_stream(self.table, self.writer, src1);
let size1 = (self.writer.index() - index_before) / 8;
let index_before = self.writer.index();
Self::encode_stream(self.table, self.writer, src2);
let size2 = (self.writer.index() - index_before) / 8;
let index_before = self.writer.index();
Self::encode_stream(self.table, self.writer, src3);
let size3 = (self.writer.index() - index_before) / 8;
Self::encode_stream(self.table, self.writer, src4);
// Sanity check, if this doesn't hold we produce a broken stream
assert!(size1 <= u16::MAX as usize);
assert!(size2 <= u16::MAX as usize);
assert!(size3 <= u16::MAX as usize);
// Update the jumptable with the real sizes
self.writer.change_bits(size_idx, size1 as u16, 16);
self.writer.change_bits(size_idx + 16, size2 as u16, 16);
self.writer.change_bits(size_idx + 32, size3 as u16, 16);
}
/// Encode one stream and pad it to fill the last byte
fn encode_stream<VV: AsMut<Vec<u8>>>(
table: &HuffmanTable,
writer: &mut BitWriter<VV>,
data: &[u8],
) {
for symbol in data.iter().rev() {
let (code, num_bits) = table.codes[*symbol as usize];
debug_assert!(num_bits > 0);
writer.write_bits(code, num_bits as usize);
}
let bits_to_fill = writer.misaligned();
if bits_to_fill == 0 {
writer.write_bits(1u32, 8);
} else {
writer.write_bits(1u32, bits_to_fill);
}
}
pub(super) fn weights(&self) -> Vec<u8> {
let max = self.table.codes.iter().map(|(_, nb)| nb).max().unwrap();
let weights = self
.table
.codes
.iter()
.copied()
.map(|(_, nb)| if nb == 0 { 0 } else { max - nb + 1 })
.collect::<Vec<u8>>();
weights
}
fn write_table(&mut self) {
// TODO strategy for determining this?
let weights = self.weights();
let weights = &weights[..weights.len() - 1]; // dont encode last weight
if weights.len() > 16 {
let size_idx = self.writer.index();
self.writer.write_bits(0u8, 8);
let idx_before = self.writer.index();
let mut encoder = FSEEncoder::new(
fse_encoder::build_table_from_data(weights.iter().copied(), 6, true),
self.writer,
);
encoder.encode_interleaved(weights);
let encoded_len = (self.writer.index() - idx_before) / 8;
assert!(encoded_len < 128);
self.writer.change_bits(size_idx, encoded_len as u8, 8);
} else {
self.writer.write_bits(weights.len() as u8 + 127, 8);
let pairs = weights.chunks_exact(2);
let remainder = pairs.remainder();
for pair in pairs.into_iter() {
let weight1 = pair[0];
let weight2 = pair[1];
assert!(weight1 < 16);
assert!(weight2 < 16);
self.writer.write_bits(weight2, 4);
self.writer.write_bits(weight1, 4);
}
if !remainder.is_empty() {
let weight = remainder[0];
assert!(weight < 16);
self.writer.write_bits(weight << 4, 8);
}
}
}
}
pub struct HuffmanTable {
/// Index is the symbol, values are the bitstring in the lower bits of the u32 and the amount of bits in the u8
codes: Vec<(u32, u8)>,
}
impl HuffmanTable {
pub fn build_from_data(data: &[u8]) -> Self {
let mut counts = [0; 256];
let mut max = 0;
for x in data {
counts[*x as usize] += 1;
max = max.max(*x);
}
Self::build_from_counts(&counts[..=max as usize])
}
pub fn build_from_counts(counts: &[usize]) -> Self {
assert!(counts.len() <= 256);
let zeros = counts.iter().filter(|x| **x == 0).count();
let mut weights = distribute_weights(counts.len() - zeros);
let limit = weights.len().ilog2() as usize + 2;
redistribute_weights(&mut weights, limit);
weights.reverse();
let mut counts_sorted = counts.iter().enumerate().collect::<Vec<_>>();
counts_sorted.sort_by(|(_, c1), (_, c2)| c1.cmp(c2));
let mut weights_distributed = alloc::vec![0; counts.len()];
for (idx, count) in counts_sorted {
if *count == 0 {
weights_distributed[idx] = 0;
} else {
weights_distributed[idx] = weights.pop().unwrap();
}
}
Self::build_from_weights(&weights_distributed)
}
pub fn build_from_weights(weights: &[usize]) -> Self {
let mut sorted = Vec::with_capacity(weights.len());
struct SortEntry {
symbol: u8,
weight: usize,
}
// TODO this doesn't need to be a temporary Vec, it could be done in a [_; 264]
// only non-zero weights are interesting here
for (symbol, weight) in weights.iter().copied().enumerate() {
if weight > 0 {
sorted.push(SortEntry {
symbol: symbol as u8,
weight,
});
}
}
// We process symbols ordered by weight and then ordered by symbol
sorted.sort_by(|left, right| match left.weight.cmp(&right.weight) {
Ordering::Equal => left.symbol.cmp(&right.symbol),
other => other,
});
// Prepare huffman table with placeholders
let mut table = HuffmanTable {
codes: Vec::with_capacity(weights.len()),
};
for _ in 0..weights.len() {
table.codes.push((0, 0));
}
// Determine the number of bits needed for codes with the lowest weight
let weight_sum = sorted.iter().map(|e| 1 << (e.weight - 1)).sum::<usize>();
if !weight_sum.is_power_of_two() {
panic!("This is an internal error");
}
let max_num_bits = highest_bit_set(weight_sum) - 1; // this is a log_2 of a clean power of two
// Starting at the symbols with the lowest weight we update the placeholders in the table
let mut current_code = 0;
let mut current_weight = 0;
let mut current_num_bits = 0;
for entry in sorted.iter() {
// If the entry isn't the same weight as the last one we need to change a few things
if current_weight != entry.weight {
// The code shifts by the difference of the weights to allow for enough unique values
current_code >>= entry.weight - current_weight;
// Encoding a symbol of this weight will take less bits than the previous weight
current_num_bits = max_num_bits - entry.weight + 1;
// Run the next update when the weight changes again
current_weight = entry.weight;
}
table.codes[entry.symbol as usize] = (current_code as u32, current_num_bits as u8);
current_code += 1;
}
table
}
pub fn can_encode(&self, other: &Self) -> Option<usize> {
if other.codes.len() > self.codes.len() {
return None;
}
let mut sum = 0;
for ((_, other_num_bits), (_, self_num_bits)) in other.codes.iter().zip(self.codes.iter()) {
if *other_num_bits != 0 && *self_num_bits == 0 {
return None;
}
sum += other_num_bits.abs_diff(*self_num_bits) as usize;
}
Some(sum)
}
}
/// Assert that the provided value is greater than zero, and returns index of the first set bit
fn highest_bit_set(x: usize) -> usize {
assert!(x > 0);
usize::BITS as usize - x.leading_zeros() as usize
}
#[test]
fn huffman() {
let table = HuffmanTable::build_from_weights(&[2, 2, 2, 1, 1]);
assert_eq!(table.codes[0], (1, 2));
assert_eq!(table.codes[1], (2, 2));
assert_eq!(table.codes[2], (3, 2));
assert_eq!(table.codes[3], (0, 3));
assert_eq!(table.codes[4], (1, 3));
let table = HuffmanTable::build_from_weights(&[4, 3, 2, 0, 1, 1]);
assert_eq!(table.codes[0], (1, 1));
assert_eq!(table.codes[1], (1, 2));
assert_eq!(table.codes[2], (1, 3));
assert_eq!(table.codes[3], (0, 0));
assert_eq!(table.codes[4], (0, 4));
assert_eq!(table.codes[5], (1, 4));
}
/// Distributes weights that add up to a clean power of two
fn distribute_weights(amount: usize) -> Vec<usize> {
assert!(amount >= 2);
assert!(amount <= 256);
let mut weights = Vec::new();
// This is the trivial power of two we always need
weights.push(1);
weights.push(1);
// This is the weight we are adding right now
let mut target_weight = 1;
// Counts how many times we have added weights
let mut weight_counter = 2;
// We always add a power of 2 new weights so that the weights that we add equal
// the weights are already in the vec if raised to the power of two.
// This means we double the weights in the vec -> results in a new power of two
//
// Example: [1, 1] -> [1,1,2] (2^1 + 2^1 == 2^2)
//
// Example: [1, 1] -> [1,1,1,1] (2^1 + 2^1 == 2^1 + 2^1)
// [1,1,1,1] -> [1,1,1,1,3] (2^1 + 2^1 + 2^1 + 2^1 == 2^3)
while weights.len() < amount {
let mut add_new = 1 << (weight_counter - target_weight);
let available_space = amount - weights.len();
// If the amount of new weights needed to get to the next power of two would exceed amount
// We instead add 1 of a bigger weight and start the cycle again
if add_new > available_space {
// TODO we could maybe instead do this until add_new <= available_space?
// target_weight += 1
// add_new /= 2
target_weight = weight_counter;
add_new = 1;
}
for _ in 0..add_new {
weights.push(target_weight);
}
weight_counter += 1;
}
assert_eq!(amount, weights.len());
weights
}
/// Sometimes distribute_weights generates weights that require too many bits to encode
/// This redistributes the weights to have less variance by raising the lower weights while still maintaining the
/// required attributes of the weight distribution
fn redistribute_weights(weights: &mut [usize], max_num_bits: usize) {
let weight_sum_log = weights
.iter()
.copied()
.map(|x| 1 << x)
.sum::<usize>()
.ilog2() as usize;
// Nothing needs to be done, this is already fine
if weight_sum_log < max_num_bits {
return;
}
// We need to decrease the weight difference by the difference between weight_sum_log and max_num_bits
let decrease_weights_by = weight_sum_log - max_num_bits + 1;
// To do that we raise the lower weights up by that difference, recording how much weight we added in the process
let mut added_weights = 0;
for weight in weights.iter_mut() {
if *weight < decrease_weights_by {
for add in *weight..decrease_weights_by {
added_weights += 1 << add;
}
*weight = decrease_weights_by;
}
}
// Then we reduce weights until the added weights are equaled out
while added_weights > 0 {
// Find the highest weight that is still lower or equal to the added weight
let mut current_idx = 0;
let mut current_weight = 0;
for (idx, weight) in weights.iter().copied().enumerate() {
if 1 << (weight - 1) > added_weights {
break;
}
if weight > current_weight {
current_weight = weight;
current_idx = idx;
}
}
// Reduce that weight by 1
added_weights -= 1 << (current_weight - 1);
weights[current_idx] -= 1;
}
// At the end we normalize the weights so that they start at 1 again
if weights[0] > 1 {
let offset = weights[0] - 1;
for weight in weights.iter_mut() {
*weight -= offset;
}
}
}
#[test]
fn weights() {
// assert_eq!(distribute_weights(5).as_slice(), &[1, 1, 2, 3, 4]);
for amount in 2..=256 {
let mut weights = distribute_weights(amount);
assert_eq!(weights.len(), amount);
let sum = weights
.iter()
.copied()
.map(|weight| 1 << weight)
.sum::<usize>();
assert!(sum.is_power_of_two());
for num_bit_limit in (amount.ilog2() as usize + 1)..=11 {
redistribute_weights(&mut weights, num_bit_limit);
let sum = weights
.iter()
.copied()
.map(|weight| 1 << weight)
.sum::<usize>();
assert!(sum.is_power_of_two());
assert!(
sum.ilog2() <= 11,
"Max bits too big: sum: {} {weights:?}",
sum
);
let codes = HuffmanTable::build_from_weights(&weights).codes;
for (code, num_bits) in codes.iter().copied() {
for (code2, num_bits2) in codes.iter().copied() {
if num_bits == 0 || num_bits2 == 0 || (code, num_bits) == (code2, num_bits2) {
continue;
}
if num_bits <= num_bits2 {
let code2_shifted = code2 >> (num_bits2 - num_bits);
assert_ne!(
code, code2_shifted,
"{:b},{num_bits:} is prefix of {:b},{num_bits2:}",
code, code2
);
}
}
}
}
}
}
#[test]
fn counts() {
let counts = &[3, 0, 4, 1, 5];
let table = HuffmanTable::build_from_counts(counts).codes;
assert_eq!(table[1].1, 0);
assert!(table[3].1 >= table[0].1);
assert!(table[0].1 >= table[2].1);
assert!(table[2].1 >= table[4].1);
let counts = &[3, 0, 4, 0, 7, 2, 2, 2, 0, 2, 2, 1, 5];
let table = HuffmanTable::build_from_counts(counts).codes;
assert_eq!(table[1].1, 0);
assert_eq!(table[3].1, 0);
assert_eq!(table[8].1, 0);
assert!(table[11].1 >= table[5].1);
assert!(table[5].1 >= table[6].1);
assert!(table[6].1 >= table[7].1);
assert!(table[7].1 >= table[9].1);
assert!(table[9].1 >= table[10].1);
assert!(table[10].1 >= table[0].1);
assert!(table[0].1 >= table[2].1);
assert!(table[2].1 >= table[12].1);
assert!(table[12].1 >= table[4].1);
}
#[test]
fn from_data() {
let counts = &[3, 0, 4, 1, 5];
let table = HuffmanTable::build_from_counts(counts).codes;
let data = &[0, 2, 4, 4, 0, 3, 2, 2, 0, 2];
let table2 = HuffmanTable::build_from_data(data).codes;
assert_eq!(table, table2);
}

84
vendor/ruzstd/src/huff0/mod.rs vendored Normal file
View File

@@ -0,0 +1,84 @@
/// Huffman coding is a method of encoding where symbols are assigned a code,
/// and more commonly used symbols get shorter codes, and less commonly
/// used symbols get longer codes. Codes are prefix free, meaning no two codes
/// will start with the same sequence of bits.
mod huff0_decoder;
pub use huff0_decoder::*;
pub mod huff0_encoder;
/// Only needed for testing.
///
/// Encodes the data with a table built from that data
/// Decodes the result again by first decoding the table and then the data
/// Asserts that the decoded data equals the input
#[cfg(any(test, feature = "fuzz_exports"))]
pub fn round_trip(data: &[u8]) {
use crate::bit_io::{BitReaderReversed, BitWriter};
use alloc::vec::Vec;
if data.len() < 2 {
return;
}
if data.iter().all(|x| *x == data[0]) {
return;
}
let mut writer = BitWriter::new();
let encoder_table = huff0_encoder::HuffmanTable::build_from_data(data);
let mut encoder = huff0_encoder::HuffmanEncoder::new(&encoder_table, &mut writer);
encoder.encode(data, true);
let encoded = writer.dump();
let mut decoder_table = HuffmanTable::new();
let table_bytes = decoder_table.build_decoder(&encoded).unwrap();
let mut decoder = HuffmanDecoder::new(&decoder_table);
let mut br = BitReaderReversed::new(&encoded[table_bytes as usize..]);
let mut skipped_bits = 0;
loop {
let val = br.get_bits(1);
skipped_bits += 1;
if val == 1 || skipped_bits > 8 {
break;
}
}
if skipped_bits > 8 {
//if more than 7 bits are 0, this is not the correct end of the bitstream. Either a bug or corrupted data
panic!("Corrupted end marker");
}
decoder.init_state(&mut br);
let mut decoded = Vec::new();
while br.bits_remaining() > -(decoder_table.max_num_bits as isize) {
decoded.push(decoder.decode_symbol());
decoder.next_state(&mut br);
}
assert_eq!(&decoded, data);
}
#[test]
fn roundtrip() {
use alloc::vec::Vec;
round_trip(&[1, 1, 1, 1, 2, 3]);
round_trip(&[1, 1, 1, 1, 2, 3, 5, 45, 12, 90]);
for size in 2..512 {
use alloc::vec;
let data = vec![123; size];
round_trip(&data);
let mut data = Vec::new();
for x in 0..size {
data.push(x as u8);
}
round_trip(&data);
}
#[cfg(feature = "std")]
if std::fs::exists("fuzz/artifacts/huff0").unwrap_or(false) {
for file in std::fs::read_dir("fuzz/artifacts/huff0").unwrap() {
if file.as_ref().unwrap().file_type().unwrap().is_file() {
let data = std::fs::read(file.unwrap().path()).unwrap();
round_trip(&data);
}
}
}
}