Vendor dependencies for 0.3.0 release

2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions
--- a/vendor/ruzstd/src/huff0/huff0_decoder.rs
+++ b/vendor/ruzstd/src/huff0/huff0_decoder.rs
@@ -0,0 +1,401 @@
+//! Utilities for decoding Huff0 encoded huffman data.
+
+use crate::bit_io::BitReaderReversed;
+use crate::decoding::errors::HuffmanTableError;
+use crate::fse::{FSEDecoder, FSETable};
+use alloc::vec::Vec;
+
+/// The Zstandard specification limits the maximum length of a code to 11 bits.
+pub(crate) const MAX_MAX_NUM_BITS: u8 = 11;
+
+pub struct HuffmanDecoder<'table> {
+    table: &'table HuffmanTable,
+    /// State is used to index into the table.
+    pub state: u64,
+}
+
+impl<'t> HuffmanDecoder<'t> {
+    /// Create a new decoder with the provided table
+    pub fn new(table: &'t HuffmanTable) -> HuffmanDecoder<'t> {
+        HuffmanDecoder { table, state: 0 }
+    }
+
+    /// Decode the symbol the internal state (cursor) is pointed at and return the
+    /// decoded literal.
+    pub fn decode_symbol(&mut self) -> u8 {
+        self.table.decode[self.state as usize].symbol
+    }
+
+    /// Initialize internal state and prepare to decode data. Then, `decode_symbol` can be called
+    /// to read the byte the internal cursor is pointing at, and `next_state` can be called to advance
+    /// the cursor until the max number of bits has been read.
+    pub fn init_state(&mut self, br: &mut BitReaderReversed<'_>) -> u8 {
+        let num_bits = self.table.max_num_bits;
+        let new_bits = br.get_bits(num_bits);
+        self.state = new_bits;
+        num_bits
+    }
+
+    /// Advance the internal cursor to the next symbol. After this, you can call `decode_symbol`
+    /// to read from the new position.
+    pub fn next_state(&mut self, br: &mut BitReaderReversed<'_>) -> u8 {
+        // self.state stores a small section, or a window of the bit stream. The table can be indexed via this state,
+        // telling you how many bits identify the current symbol.
+        let num_bits = self.table.decode[self.state as usize].num_bits;
+        // New bits are read from the stream
+        let new_bits = br.get_bits(num_bits);
+        // Shift and mask out the bits that identify the current symbol
+        self.state <<= num_bits;
+        self.state &= self.table.decode.len() as u64 - 1;
+        // The new bits are appended at the end of the current state.
+        self.state |= new_bits;
+        num_bits
+    }
+}
+
+/// A Huffman decoding table contains a list of Huffman prefix codes and their associated values
+pub struct HuffmanTable {
+    decode: Vec<Entry>,
+    /// The weight of a symbol is the number of occurences in a table.
+    /// This value is used in constructing a binary tree referred to as
+    /// a Huffman tree. Once this tree is constructed, it can be used to build the
+    /// lookup table
+    weights: Vec<u8>,
+    /// The maximum size in bits a prefix code in the encoded data can be.
+    /// This value is used so that the decoder knows how many bits
+    /// to read from the bitstream before checking the table. This
+    /// value must be 11 or lower.
+    pub max_num_bits: u8,
+    bits: Vec<u8>,
+    bit_ranks: Vec<u32>,
+    rank_indexes: Vec<usize>,
+    /// In some cases, the list of weights is compressed using FSE compression.
+    fse_table: FSETable,
+}
+
+impl HuffmanTable {
+    /// Create a new, empty table.
+    pub fn new() -> HuffmanTable {
+        HuffmanTable {
+            decode: Vec::new(),
+
+            weights: Vec::with_capacity(256),
+            max_num_bits: 0,
+            bits: Vec::with_capacity(256),
+            bit_ranks: Vec::with_capacity(11),
+            rank_indexes: Vec::with_capacity(11),
+            fse_table: FSETable::new(255),
+        }
+    }
+
+    /// Completely empty the table then repopulate as a replica
+    /// of `other`.
+    pub fn reinit_from(&mut self, other: &Self) {
+        self.reset();
+        self.decode.extend_from_slice(&other.decode);
+        self.weights.extend_from_slice(&other.weights);
+        self.max_num_bits = other.max_num_bits;
+        self.bits.extend_from_slice(&other.bits);
+        self.rank_indexes.extend_from_slice(&other.rank_indexes);
+        self.fse_table.reinit_from(&other.fse_table);
+    }
+
+    /// Completely empty the table of all data.
+    pub fn reset(&mut self) {
+        self.decode.clear();
+        self.weights.clear();
+        self.max_num_bits = 0;
+        self.bits.clear();
+        self.bit_ranks.clear();
+        self.rank_indexes.clear();
+        self.fse_table.reset();
+    }
+
+    /// Read from `source` and decode the input, populating the huffman decoding table.
+    ///
+    /// Returns the number of bytes read.
+    pub fn build_decoder(&mut self, source: &[u8]) -> Result<u32, HuffmanTableError> {
+        self.decode.clear();
+
+        let bytes_used = self.read_weights(source)?;
+        self.build_table_from_weights()?;
+        Ok(bytes_used)
+    }
+
+    /// Read weights from the provided source.
+    ///
+    /// The huffman table is represented in the input data as a list of weights.
+    /// After the header, weights are read, then a Huffman decoding table
+    /// can be constructed using that list of weights.
+    ///
+    /// Returns the number of bytes read.
+    fn read_weights(&mut self, source: &[u8]) -> Result<u32, HuffmanTableError> {
+        use HuffmanTableError as err;
+
+        if source.is_empty() {
+            return Err(err::SourceIsEmpty);
+        }
+        let header = source[0];
+        let mut bits_read = 8;
+
+        match header {
+            // If the header byte is less than 128, the series of weights
+            // is compressed using two interleaved FSE streams that share
+            // a distribution table.
+            0..=127 => {
+                let fse_stream = &source[1..];
+                if header as usize > fse_stream.len() {
+                    return Err(err::NotEnoughBytesForWeights {
+                        got_bytes: fse_stream.len(),
+                        expected_bytes: header,
+                    });
+                }
+                //fse decompress weights
+                let bytes_used_by_fse_header = self.fse_table.build_decoder(fse_stream, 6)?;
+
+                if bytes_used_by_fse_header > header as usize {
+                    return Err(err::FSETableUsedTooManyBytes {
+                        used: bytes_used_by_fse_header,
+                        available_bytes: header,
+                    });
+                }
+
+                vprintln!(
+                    "Building fse table for huffman weights used: {}",
+                    bytes_used_by_fse_header
+                );
+                // Huffman headers are compressed using two interleaved
+                // FSE bitstreams, where the first state (decoder) handles
+                // even symbols, and the second handles odd symbols.
+                let mut dec1 = FSEDecoder::new(&self.fse_table);
+                let mut dec2 = FSEDecoder::new(&self.fse_table);
+
+                let compressed_start = bytes_used_by_fse_header;
+                let compressed_length = header as usize - bytes_used_by_fse_header;
+
+                let compressed_weights = &fse_stream[compressed_start..];
+                if compressed_weights.len() < compressed_length {
+                    return Err(err::NotEnoughBytesToDecompressWeights {
+                        have: compressed_weights.len(),
+                        need: compressed_length,
+                    });
+                }
+                let compressed_weights = &compressed_weights[..compressed_length];
+                let mut br = BitReaderReversed::new(compressed_weights);
+
+                bits_read += (bytes_used_by_fse_header + compressed_length) * 8;
+
+                //skip the 0 padding at the end of the last byte of the bit stream and throw away the first 1 found
+                let mut skipped_bits = 0;
+                loop {
+                    let val = br.get_bits(1);
+                    skipped_bits += 1;
+                    if val == 1 || skipped_bits > 8 {
+                        break;
+                    }
+                }
+                if skipped_bits > 8 {
+                    //if more than 7 bits are 0, this is not the correct end of the bitstream. Either a bug or corrupted data
+                    return Err(err::ExtraPadding { skipped_bits });
+                }
+
+                dec1.init_state(&mut br)?;
+                dec2.init_state(&mut br)?;
+
+                self.weights.clear();
+
+                // The two decoders take turns decoding a single symbol and updating their state.
+                loop {
+                    let w = dec1.decode_symbol();
+                    self.weights.push(w);
+                    dec1.update_state(&mut br);
+
+                    if br.bits_remaining() <= -1 {
+                        //collect final states
+                        self.weights.push(dec2.decode_symbol());
+                        break;
+                    }
+
+                    let w = dec2.decode_symbol();
+                    self.weights.push(w);
+                    dec2.update_state(&mut br);
+
+                    if br.bits_remaining() <= -1 {
+                        //collect final states
+                        self.weights.push(dec1.decode_symbol());
+                        break;
+                    }
+                    //maximum number of weights is 255 because we use u8 symbols and the last weight is inferred from the sum of all others
+                    if self.weights.len() > 255 {
+                        return Err(err::TooManyWeights {
+                            got: self.weights.len(),
+                        });
+                    }
+                }
+            }
+            // If the header byte is greater than or equal to 128,
+            // weights are directly represented, where each weight is
+            // encoded directly as a 4 bit field. The weights will
+            // always be encoded with full bytes, meaning if there's
+            // an odd number of weights, the last weight will still
+            // occupy a full byte.
+            _ => {
+                // weights are directly encoded
+                let weights_raw = &source[1..];
+                let num_weights = header - 127;
+                self.weights.resize(num_weights as usize, 0);
+
+                let bytes_needed = if num_weights % 2 == 0 {
+                    num_weights as usize / 2
+                } else {
+                    (num_weights as usize / 2) + 1
+                };
+
+                if weights_raw.len() < bytes_needed {
+                    return Err(err::NotEnoughBytesInSource {
+                        got: weights_raw.len(),
+                        need: bytes_needed,
+                    });
+                }
+
+                for idx in 0..num_weights {
+                    if idx % 2 == 0 {
+                        self.weights[idx as usize] = weights_raw[idx as usize / 2] >> 4;
+                    } else {
+                        self.weights[idx as usize] = weights_raw[idx as usize / 2] & 0xF;
+                    }
+                    bits_read += 4;
+                }
+            }
+        }
+
+        let bytes_read = if bits_read % 8 == 0 {
+            bits_read / 8
+        } else {
+            (bits_read / 8) + 1
+        };
+        Ok(bytes_read as u32)
+    }
+
+    /// Once the weights have been read from the data, you can decode the weights
+    /// into a table, and use that table to decode the actual compressed data.
+    ///
+    /// This function populates the rest of the table from the series of weights.
+    fn build_table_from_weights(&mut self) -> Result<(), HuffmanTableError> {
+        use HuffmanTableError as err;
+
+        self.bits.clear();
+        self.bits.resize(self.weights.len() + 1, 0);
+
+        let mut weight_sum: u32 = 0;
+        for w in &self.weights {
+            if *w > MAX_MAX_NUM_BITS {
+                return Err(err::WeightBiggerThanMaxNumBits { got: *w });
+            }
+            weight_sum += if *w > 0 { 1_u32 << (*w - 1) } else { 0 };
+        }
+
+        if weight_sum == 0 {
+            return Err(err::MissingWeights);
+        }
+
+        let max_bits = highest_bit_set(weight_sum) as u8;
+        let left_over = (1 << max_bits) - weight_sum;
+
+        //left_over must be power of two
+        if !left_over.is_power_of_two() {
+            return Err(err::LeftoverIsNotAPowerOf2 { got: left_over });
+        }
+
+        let last_weight = highest_bit_set(left_over) as u8;
+
+        for symbol in 0..self.weights.len() {
+            let bits = if self.weights[symbol] > 0 {
+                max_bits + 1 - self.weights[symbol]
+            } else {
+                0
+            };
+            self.bits[symbol] = bits;
+        }
+
+        self.bits[self.weights.len()] = max_bits + 1 - last_weight;
+        self.max_num_bits = max_bits;
+
+        if max_bits > MAX_MAX_NUM_BITS {
+            return Err(err::MaxBitsTooHigh { got: max_bits });
+        }
+
+        self.bit_ranks.clear();
+        self.bit_ranks.resize((max_bits + 1) as usize, 0);
+        for num_bits in &self.bits {
+            self.bit_ranks[(*num_bits) as usize] += 1;
+        }
+
+        //fill with dummy symbols
+        self.decode.resize(
+            1 << self.max_num_bits,
+            Entry {
+                symbol: 0,
+                num_bits: 0,
+            },
+        );
+
+        //starting codes for each rank
+        self.rank_indexes.clear();
+        self.rank_indexes.resize((max_bits + 1) as usize, 0);
+
+        self.rank_indexes[max_bits as usize] = 0;
+        for bits in (1..self.rank_indexes.len() as u8).rev() {
+            self.rank_indexes[bits as usize - 1] = self.rank_indexes[bits as usize]
+                + self.bit_ranks[bits as usize] as usize * (1 << (max_bits - bits));
+        }
+
+        assert!(
+            self.rank_indexes[0] == self.decode.len(),
+            "rank_idx[0]: {} should be: {}",
+            self.rank_indexes[0],
+            self.decode.len()
+        );
+
+        for symbol in 0..self.bits.len() {
+            let bits_for_symbol = self.bits[symbol];
+            if bits_for_symbol != 0 {
+                // allocate code for the symbol and set in the table
+                // a code ignores all max_bits - bits[symbol] bits, so it gets
+                // a range that spans all of those in the decoding table
+                let base_idx = self.rank_indexes[bits_for_symbol as usize];
+                let len = 1 << (max_bits - bits_for_symbol);
+                self.rank_indexes[bits_for_symbol as usize] += len;
+                for idx in 0..len {
+                    self.decode[base_idx + idx].symbol = symbol as u8;
+                    self.decode[base_idx + idx].num_bits = bits_for_symbol;
+                }
+            }
+        }
+
+        Ok(())
+    }
+}
+
+impl Default for HuffmanTable {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// A single entry in the table contains the decoded symbol/literal and the
+/// size of the prefix code.
+#[derive(Copy, Clone, Debug)]
+pub struct Entry {
+    /// The byte that the prefix code replaces during encoding.
+    symbol: u8,
+    /// The number of bits the prefix code occupies.
+    num_bits: u8,
+}
+
+/// Assert that the provided value is greater than zero, and returns the
+/// 32 - the number of leading zeros
+fn highest_bit_set(x: u32) -> u32 {
+    assert!(x > 0);
+    u32::BITS - x.leading_zeros()
+}
--- a/vendor/ruzstd/src/huff0/huff0_encoder.rs
+++ b/vendor/ruzstd/src/huff0/huff0_encoder.rs
@@ -0,0 +1,484 @@
+use alloc::vec::Vec;
+use core::cmp::Ordering;
+
+use crate::{
+    bit_io::BitWriter,
+    fse::fse_encoder::{self, FSEEncoder},
+};
+
+pub(crate) struct HuffmanEncoder<'output, 'table, V: AsMut<Vec<u8>>> {
+    table: &'table HuffmanTable,
+    writer: &'output mut BitWriter<V>,
+}
+
+impl<V: AsMut<Vec<u8>>> HuffmanEncoder<'_, '_, V> {
+    pub fn new<'o, 't>(
+        table: &'t HuffmanTable,
+        writer: &'o mut BitWriter<V>,
+    ) -> HuffmanEncoder<'o, 't, V> {
+        HuffmanEncoder { table, writer }
+    }
+
+    /// Encodes the data using the provided table
+    /// Writes
+    /// * Table description
+    /// * Encoded data
+    /// * Padding bits to fill up last byte
+    pub fn encode(&mut self, data: &[u8], with_table: bool) {
+        if with_table {
+            self.write_table();
+        }
+        Self::encode_stream(self.table, self.writer, data);
+    }
+
+    /// Encodes the data using the provided table in 4 concatenated streams
+    /// Writes
+    /// * Table description
+    /// * Jumptable
+    /// * Encoded data in 4 streams, each padded to fill the last byte
+    pub fn encode4x(&mut self, data: &[u8], with_table: bool) {
+        assert!(data.len() >= 4);
+
+        // Split data in 4 equally sized parts (the last one might be a bit smaller than the rest)
+        let split_size = data.len().div_ceil(4);
+        let src1 = &data[..split_size];
+        let src2 = &data[split_size..split_size * 2];
+        let src3 = &data[split_size * 2..split_size * 3];
+        let src4 = &data[split_size * 3..];
+
+        // Write table description
+        if with_table {
+            self.write_table();
+        }
+
+        // Reserve space for the jump table, will be changed later
+        let size_idx = self.writer.index();
+        self.writer.write_bits(0u16, 16);
+        self.writer.write_bits(0u16, 16);
+        self.writer.write_bits(0u16, 16);
+
+        // Write the 4 streams, noting the sizes of the encoded streams
+        let index_before = self.writer.index();
+        Self::encode_stream(self.table, self.writer, src1);
+        let size1 = (self.writer.index() - index_before) / 8;
+
+        let index_before = self.writer.index();
+        Self::encode_stream(self.table, self.writer, src2);
+        let size2 = (self.writer.index() - index_before) / 8;
+
+        let index_before = self.writer.index();
+        Self::encode_stream(self.table, self.writer, src3);
+        let size3 = (self.writer.index() - index_before) / 8;
+
+        Self::encode_stream(self.table, self.writer, src4);
+
+        // Sanity check, if this doesn't hold we produce a broken stream
+        assert!(size1 <= u16::MAX as usize);
+        assert!(size2 <= u16::MAX as usize);
+        assert!(size3 <= u16::MAX as usize);
+
+        // Update the jumptable with the real sizes
+        self.writer.change_bits(size_idx, size1 as u16, 16);
+        self.writer.change_bits(size_idx + 16, size2 as u16, 16);
+        self.writer.change_bits(size_idx + 32, size3 as u16, 16);
+    }
+
+    /// Encode one stream and pad it to fill the last byte
+    fn encode_stream<VV: AsMut<Vec<u8>>>(
+        table: &HuffmanTable,
+        writer: &mut BitWriter<VV>,
+        data: &[u8],
+    ) {
+        for symbol in data.iter().rev() {
+            let (code, num_bits) = table.codes[*symbol as usize];
+            debug_assert!(num_bits > 0);
+            writer.write_bits(code, num_bits as usize);
+        }
+
+        let bits_to_fill = writer.misaligned();
+        if bits_to_fill == 0 {
+            writer.write_bits(1u32, 8);
+        } else {
+            writer.write_bits(1u32, bits_to_fill);
+        }
+    }
+
+    pub(super) fn weights(&self) -> Vec<u8> {
+        let max = self.table.codes.iter().map(|(_, nb)| nb).max().unwrap();
+        let weights = self
+            .table
+            .codes
+            .iter()
+            .copied()
+            .map(|(_, nb)| if nb == 0 { 0 } else { max - nb + 1 })
+            .collect::<Vec<u8>>();
+
+        weights
+    }
+
+    fn write_table(&mut self) {
+        // TODO strategy for determining this?
+        let weights = self.weights();
+        let weights = &weights[..weights.len() - 1]; // dont encode last weight
+        if weights.len() > 16 {
+            let size_idx = self.writer.index();
+            self.writer.write_bits(0u8, 8);
+            let idx_before = self.writer.index();
+            let mut encoder = FSEEncoder::new(
+                fse_encoder::build_table_from_data(weights.iter().copied(), 6, true),
+                self.writer,
+            );
+            encoder.encode_interleaved(weights);
+            let encoded_len = (self.writer.index() - idx_before) / 8;
+            assert!(encoded_len < 128);
+            self.writer.change_bits(size_idx, encoded_len as u8, 8);
+        } else {
+            self.writer.write_bits(weights.len() as u8 + 127, 8);
+            let pairs = weights.chunks_exact(2);
+            let remainder = pairs.remainder();
+            for pair in pairs.into_iter() {
+                let weight1 = pair[0];
+                let weight2 = pair[1];
+                assert!(weight1 < 16);
+                assert!(weight2 < 16);
+                self.writer.write_bits(weight2, 4);
+                self.writer.write_bits(weight1, 4);
+            }
+            if !remainder.is_empty() {
+                let weight = remainder[0];
+                assert!(weight < 16);
+                self.writer.write_bits(weight << 4, 8);
+            }
+        }
+    }
+}
+
+pub struct HuffmanTable {
+    /// Index is the symbol, values are the bitstring in the lower bits of the u32 and the amount of bits in the u8
+    codes: Vec<(u32, u8)>,
+}
+
+impl HuffmanTable {
+    pub fn build_from_data(data: &[u8]) -> Self {
+        let mut counts = [0; 256];
+        let mut max = 0;
+        for x in data {
+            counts[*x as usize] += 1;
+            max = max.max(*x);
+        }
+
+        Self::build_from_counts(&counts[..=max as usize])
+    }
+
+    pub fn build_from_counts(counts: &[usize]) -> Self {
+        assert!(counts.len() <= 256);
+        let zeros = counts.iter().filter(|x| **x == 0).count();
+        let mut weights = distribute_weights(counts.len() - zeros);
+        let limit = weights.len().ilog2() as usize + 2;
+        redistribute_weights(&mut weights, limit);
+
+        weights.reverse();
+        let mut counts_sorted = counts.iter().enumerate().collect::<Vec<_>>();
+        counts_sorted.sort_by(|(_, c1), (_, c2)| c1.cmp(c2));
+
+        let mut weights_distributed = alloc::vec![0; counts.len()];
+        for (idx, count) in counts_sorted {
+            if *count == 0 {
+                weights_distributed[idx] = 0;
+            } else {
+                weights_distributed[idx] = weights.pop().unwrap();
+            }
+        }
+
+        Self::build_from_weights(&weights_distributed)
+    }
+
+    pub fn build_from_weights(weights: &[usize]) -> Self {
+        let mut sorted = Vec::with_capacity(weights.len());
+        struct SortEntry {
+            symbol: u8,
+            weight: usize,
+        }
+
+        // TODO this doesn't need to be a temporary Vec, it could be done in a [_; 264]
+        // only non-zero weights are interesting here
+        for (symbol, weight) in weights.iter().copied().enumerate() {
+            if weight > 0 {
+                sorted.push(SortEntry {
+                    symbol: symbol as u8,
+                    weight,
+                });
+            }
+        }
+        // We process symbols ordered by weight and then ordered by symbol
+        sorted.sort_by(|left, right| match left.weight.cmp(&right.weight) {
+            Ordering::Equal => left.symbol.cmp(&right.symbol),
+            other => other,
+        });
+
+        // Prepare huffman table with placeholders
+        let mut table = HuffmanTable {
+            codes: Vec::with_capacity(weights.len()),
+        };
+        for _ in 0..weights.len() {
+            table.codes.push((0, 0));
+        }
+
+        // Determine the number of bits needed for codes with the lowest weight
+        let weight_sum = sorted.iter().map(|e| 1 << (e.weight - 1)).sum::<usize>();
+        if !weight_sum.is_power_of_two() {
+            panic!("This is an internal error");
+        }
+        let max_num_bits = highest_bit_set(weight_sum) - 1; // this is a log_2 of a clean power of two
+
+        // Starting at the symbols with the lowest weight we update the placeholders in the table
+        let mut current_code = 0;
+        let mut current_weight = 0;
+        let mut current_num_bits = 0;
+        for entry in sorted.iter() {
+            // If the entry isn't the same weight as the last one we need to change a few things
+            if current_weight != entry.weight {
+                // The code shifts by the difference of the weights to allow for enough unique values
+                current_code >>= entry.weight - current_weight;
+                // Encoding a symbol of this weight will take less bits than the previous weight
+                current_num_bits = max_num_bits - entry.weight + 1;
+                // Run the next update when the weight changes again
+                current_weight = entry.weight;
+            }
+            table.codes[entry.symbol as usize] = (current_code as u32, current_num_bits as u8);
+            current_code += 1;
+        }
+
+        table
+    }
+
+    pub fn can_encode(&self, other: &Self) -> Option<usize> {
+        if other.codes.len() > self.codes.len() {
+            return None;
+        }
+        let mut sum = 0;
+        for ((_, other_num_bits), (_, self_num_bits)) in other.codes.iter().zip(self.codes.iter()) {
+            if *other_num_bits != 0 && *self_num_bits == 0 {
+                return None;
+            }
+            sum += other_num_bits.abs_diff(*self_num_bits) as usize;
+        }
+        Some(sum)
+    }
+}
+
+/// Assert that the provided value is greater than zero, and returns index of the first set bit
+fn highest_bit_set(x: usize) -> usize {
+    assert!(x > 0);
+    usize::BITS as usize - x.leading_zeros() as usize
+}
+
+#[test]
+fn huffman() {
+    let table = HuffmanTable::build_from_weights(&[2, 2, 2, 1, 1]);
+    assert_eq!(table.codes[0], (1, 2));
+    assert_eq!(table.codes[1], (2, 2));
+    assert_eq!(table.codes[2], (3, 2));
+    assert_eq!(table.codes[3], (0, 3));
+    assert_eq!(table.codes[4], (1, 3));
+
+    let table = HuffmanTable::build_from_weights(&[4, 3, 2, 0, 1, 1]);
+    assert_eq!(table.codes[0], (1, 1));
+    assert_eq!(table.codes[1], (1, 2));
+    assert_eq!(table.codes[2], (1, 3));
+    assert_eq!(table.codes[3], (0, 0));
+    assert_eq!(table.codes[4], (0, 4));
+    assert_eq!(table.codes[5], (1, 4));
+}
+
+/// Distributes weights that add up to a clean power of two
+fn distribute_weights(amount: usize) -> Vec<usize> {
+    assert!(amount >= 2);
+    assert!(amount <= 256);
+    let mut weights = Vec::new();
+
+    // This is the trivial power of two we always need
+    weights.push(1);
+    weights.push(1);
+
+    // This is the weight we are adding right now
+    let mut target_weight = 1;
+    // Counts how many times we have added weights
+    let mut weight_counter = 2;
+
+    // We always add a power of 2 new weights so that the weights that we add equal
+    // the weights are already in the vec if raised to the power of two.
+    // This means we double the weights in the vec -> results in a new power of two
+    //
+    // Example: [1, 1]      -> [1,1,2]       (2^1 + 2^1 == 2^2)
+    //
+    // Example: [1, 1]      -> [1,1,1,1]     (2^1 + 2^1 == 2^1 + 2^1)
+    //          [1,1,1,1]   -> [1,1,1,1,3]   (2^1 + 2^1 + 2^1 + 2^1 == 2^3)
+    while weights.len() < amount {
+        let mut add_new = 1 << (weight_counter - target_weight);
+        let available_space = amount - weights.len();
+
+        // If the amount of new weights needed to get to the next power of two would exceed amount
+        // We instead add 1 of a bigger weight and start the cycle again
+        if add_new > available_space {
+            // TODO we could maybe instead do this until add_new <= available_space?
+            //  target_weight += 1
+            //  add_new /= 2
+            target_weight = weight_counter;
+            add_new = 1;
+        }
+
+        for _ in 0..add_new {
+            weights.push(target_weight);
+        }
+        weight_counter += 1;
+    }
+
+    assert_eq!(amount, weights.len());
+
+    weights
+}
+
+/// Sometimes distribute_weights generates weights that require too many bits to encode
+/// This redistributes the weights to have less variance by raising the lower weights while still maintaining the
+/// required attributes of the weight distribution
+fn redistribute_weights(weights: &mut [usize], max_num_bits: usize) {
+    let weight_sum_log = weights
+        .iter()
+        .copied()
+        .map(|x| 1 << x)
+        .sum::<usize>()
+        .ilog2() as usize;
+
+    // Nothing needs to be done, this is already fine
+    if weight_sum_log < max_num_bits {
+        return;
+    }
+
+    // We need to decrease the weight difference by the difference between weight_sum_log and max_num_bits
+    let decrease_weights_by = weight_sum_log - max_num_bits + 1;
+
+    // To do that we raise the lower weights up by that difference, recording how much weight we added in the process
+    let mut added_weights = 0;
+    for weight in weights.iter_mut() {
+        if *weight < decrease_weights_by {
+            for add in *weight..decrease_weights_by {
+                added_weights += 1 << add;
+            }
+            *weight = decrease_weights_by;
+        }
+    }
+
+    // Then we reduce weights until the added weights are equaled out
+    while added_weights > 0 {
+        // Find the highest weight that is still lower or equal to the added weight
+        let mut current_idx = 0;
+        let mut current_weight = 0;
+        for (idx, weight) in weights.iter().copied().enumerate() {
+            if 1 << (weight - 1) > added_weights {
+                break;
+            }
+            if weight > current_weight {
+                current_weight = weight;
+                current_idx = idx;
+            }
+        }
+
+        // Reduce that weight by 1
+        added_weights -= 1 << (current_weight - 1);
+        weights[current_idx] -= 1;
+    }
+
+    // At the end we normalize the weights so that they start at 1 again
+    if weights[0] > 1 {
+        let offset = weights[0] - 1;
+        for weight in weights.iter_mut() {
+            *weight -= offset;
+        }
+    }
+}
+
+#[test]
+fn weights() {
+    // assert_eq!(distribute_weights(5).as_slice(), &[1, 1, 2, 3, 4]);
+    for amount in 2..=256 {
+        let mut weights = distribute_weights(amount);
+        assert_eq!(weights.len(), amount);
+        let sum = weights
+            .iter()
+            .copied()
+            .map(|weight| 1 << weight)
+            .sum::<usize>();
+        assert!(sum.is_power_of_two());
+
+        for num_bit_limit in (amount.ilog2() as usize + 1)..=11 {
+            redistribute_weights(&mut weights, num_bit_limit);
+            let sum = weights
+                .iter()
+                .copied()
+                .map(|weight| 1 << weight)
+                .sum::<usize>();
+            assert!(sum.is_power_of_two());
+            assert!(
+                sum.ilog2() <= 11,
+                "Max bits too big: sum: {} {weights:?}",
+                sum
+            );
+
+            let codes = HuffmanTable::build_from_weights(&weights).codes;
+            for (code, num_bits) in codes.iter().copied() {
+                for (code2, num_bits2) in codes.iter().copied() {
+                    if num_bits == 0 || num_bits2 == 0 || (code, num_bits) == (code2, num_bits2) {
+                        continue;
+                    }
+                    if num_bits <= num_bits2 {
+                        let code2_shifted = code2 >> (num_bits2 - num_bits);
+                        assert_ne!(
+                            code, code2_shifted,
+                            "{:b},{num_bits:} is prefix of {:b},{num_bits2:}",
+                            code, code2
+                        );
+                    }
+                }
+            }
+        }
+    }
+}
+
+#[test]
+fn counts() {
+    let counts = &[3, 0, 4, 1, 5];
+    let table = HuffmanTable::build_from_counts(counts).codes;
+
+    assert_eq!(table[1].1, 0);
+    assert!(table[3].1 >= table[0].1);
+    assert!(table[0].1 >= table[2].1);
+    assert!(table[2].1 >= table[4].1);
+
+    let counts = &[3, 0, 4, 0, 7, 2, 2, 2, 0, 2, 2, 1, 5];
+    let table = HuffmanTable::build_from_counts(counts).codes;
+
+    assert_eq!(table[1].1, 0);
+    assert_eq!(table[3].1, 0);
+    assert_eq!(table[8].1, 0);
+    assert!(table[11].1 >= table[5].1);
+    assert!(table[5].1 >= table[6].1);
+    assert!(table[6].1 >= table[7].1);
+    assert!(table[7].1 >= table[9].1);
+    assert!(table[9].1 >= table[10].1);
+    assert!(table[10].1 >= table[0].1);
+    assert!(table[0].1 >= table[2].1);
+    assert!(table[2].1 >= table[12].1);
+    assert!(table[12].1 >= table[4].1);
+}
+
+#[test]
+fn from_data() {
+    let counts = &[3, 0, 4, 1, 5];
+    let table = HuffmanTable::build_from_counts(counts).codes;
+
+    let data = &[0, 2, 4, 4, 0, 3, 2, 2, 0, 2];
+    let table2 = HuffmanTable::build_from_data(data).codes;
+
+    assert_eq!(table, table2);
+}
--- a/vendor/ruzstd/src/huff0/mod.rs
+++ b/vendor/ruzstd/src/huff0/mod.rs
@@ -0,0 +1,84 @@
+/// Huffman coding is a method of encoding where symbols are assigned a code,
+/// and more commonly used symbols get shorter codes, and less commonly
+/// used symbols get longer codes. Codes are prefix free, meaning no two codes
+/// will start with the same sequence of bits.
+mod huff0_decoder;
+pub use huff0_decoder::*;
+pub mod huff0_encoder;
+
+/// Only needed for testing.
+///
+/// Encodes the data with a table built from that data
+/// Decodes the result again by first decoding the table and then the data
+/// Asserts that the decoded data equals the input
+#[cfg(any(test, feature = "fuzz_exports"))]
+pub fn round_trip(data: &[u8]) {
+    use crate::bit_io::{BitReaderReversed, BitWriter};
+    use alloc::vec::Vec;
+
+    if data.len() < 2 {
+        return;
+    }
+    if data.iter().all(|x| *x == data[0]) {
+        return;
+    }
+    let mut writer = BitWriter::new();
+    let encoder_table = huff0_encoder::HuffmanTable::build_from_data(data);
+    let mut encoder = huff0_encoder::HuffmanEncoder::new(&encoder_table, &mut writer);
+
+    encoder.encode(data, true);
+    let encoded = writer.dump();
+    let mut decoder_table = HuffmanTable::new();
+    let table_bytes = decoder_table.build_decoder(&encoded).unwrap();
+    let mut decoder = HuffmanDecoder::new(&decoder_table);
+
+    let mut br = BitReaderReversed::new(&encoded[table_bytes as usize..]);
+    let mut skipped_bits = 0;
+    loop {
+        let val = br.get_bits(1);
+        skipped_bits += 1;
+        if val == 1 || skipped_bits > 8 {
+            break;
+        }
+    }
+    if skipped_bits > 8 {
+        //if more than 7 bits are 0, this is not the correct end of the bitstream. Either a bug or corrupted data
+        panic!("Corrupted end marker");
+    }
+
+    decoder.init_state(&mut br);
+    let mut decoded = Vec::new();
+    while br.bits_remaining() > -(decoder_table.max_num_bits as isize) {
+        decoded.push(decoder.decode_symbol());
+        decoder.next_state(&mut br);
+    }
+    assert_eq!(&decoded, data);
+}
+
+#[test]
+fn roundtrip() {
+    use alloc::vec::Vec;
+    round_trip(&[1, 1, 1, 1, 2, 3]);
+    round_trip(&[1, 1, 1, 1, 2, 3, 5, 45, 12, 90]);
+
+    for size in 2..512 {
+        use alloc::vec;
+        let data = vec![123; size];
+        round_trip(&data);
+        let mut data = Vec::new();
+        for x in 0..size {
+            data.push(x as u8);
+        }
+        round_trip(&data);
+    }
+
+    #[cfg(feature = "std")]
+    if std::fs::exists("fuzz/artifacts/huff0").unwrap_or(false) {
+        for file in std::fs::read_dir("fuzz/artifacts/huff0").unwrap() {
+            if file.as_ref().unwrap().file_type().unwrap().is_file() {
+                let data = std::fs::read(file.unwrap().path()).unwrap();
+                round_trip(&data);
+            }
+        }
+    }
+}