Vendor dependencies for 0.3.0 release

2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions
--- a/vendor/ruzstd/src/fse/fse_decoder.rs
+++ b/vendor/ruzstd/src/fse/fse_decoder.rs
@@ -0,0 +1,366 @@
+use crate::bit_io::{BitReader, BitReaderReversed};
+use crate::decoding::errors::{FSEDecoderError, FSETableError};
+use alloc::vec::Vec;
+
+pub struct FSEDecoder<'table> {
+    /// An FSE state value represents an index in the FSE table.
+    pub state: Entry,
+    /// A reference to the table used for decoding.
+    table: &'table FSETable,
+}
+
+impl<'t> FSEDecoder<'t> {
+    /// Initialize a new Finite State Entropy decoder.
+    pub fn new(table: &'t FSETable) -> FSEDecoder<'t> {
+        FSEDecoder {
+            state: table.decode.first().copied().unwrap_or(Entry {
+                base_line: 0,
+                num_bits: 0,
+                symbol: 0,
+            }),
+            table,
+        }
+    }
+
+    /// Returns the byte associated with the symbol the internal cursor is pointing at.
+    pub fn decode_symbol(&self) -> u8 {
+        self.state.symbol
+    }
+
+    /// Initialize internal state and prepare for decoding. After this, `decode_symbol` can be called
+    /// to read the first symbol and `update_state` can be called to prepare to read the next symbol.
+    pub fn init_state(&mut self, bits: &mut BitReaderReversed<'_>) -> Result<(), FSEDecoderError> {
+        if self.table.accuracy_log == 0 {
+            return Err(FSEDecoderError::TableIsUninitialized);
+        }
+        let new_state = bits.get_bits(self.table.accuracy_log);
+        self.state = self.table.decode[new_state as usize];
+
+        Ok(())
+    }
+
+    /// Advance the internal state to decode the next symbol in the bitstream.
+    pub fn update_state(&mut self, bits: &mut BitReaderReversed<'_>) {
+        let num_bits = self.state.num_bits;
+        let add = bits.get_bits(num_bits);
+        let base_line = self.state.base_line;
+        let new_state = base_line + add as u32;
+        self.state = self.table.decode[new_state as usize];
+
+        //println!("Update: {}, {} -> {}", base_line, add,  self.state);
+    }
+}
+
+/// FSE decoding involves a decoding table that describes the probabilities of
+/// all literals from 0 to the highest present one
+///
+/// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#fse-table-description>
+#[derive(Debug, Clone)]
+pub struct FSETable {
+    /// The maximum symbol in the table (inclusive). Limits the probabilities length to max_symbol + 1.
+    max_symbol: u8,
+    /// The actual table containing the decoded symbol and the compression data
+    /// connected to that symbol.
+    pub decode: Vec<Entry>, //used to decode symbols, and calculate the next state
+    /// The size of the table is stored in logarithm base 2 format,
+    /// with the **size of the table** being equal to `(1 << accuracy_log)`.
+    /// This value is used so that the decoder knows how many bits to read from the bitstream.
+    pub accuracy_log: u8,
+    /// In this context, probability refers to the likelihood that a symbol occurs in the given data.
+    /// Given this info, the encoder can assign shorter codes to symbols that appear more often,
+    /// and longer codes that appear less often, then the decoder can use the probability
+    /// to determine what code was assigned to what symbol.
+    ///
+    /// The probability of a single symbol is a value representing the proportion of times the symbol
+    /// would fall within the data.
+    ///
+    /// If a symbol probability is set to `-1`, it means that the probability of a symbol
+    /// occurring in the data is less than one.
+    pub symbol_probabilities: Vec<i32>, //used while building the decode Vector
+    /// The number of times each symbol occurs (The first entry being 0x0, the second being 0x1) and so on
+    /// up until the highest possible symbol (255).
+    symbol_counter: Vec<u32>,
+}
+
+impl FSETable {
+    /// Initialize a new empty Finite State Entropy decoding table.
+    pub fn new(max_symbol: u8) -> FSETable {
+        FSETable {
+            max_symbol,
+            symbol_probabilities: Vec::with_capacity(256), //will never be more than 256 symbols because u8
+            symbol_counter: Vec::with_capacity(256), //will never be more than 256 symbols because u8
+            decode: Vec::new(),                      //depending on acc_log.
+            accuracy_log: 0,
+        }
+    }
+
+    /// Reset `self` and update `self`'s state to mirror the provided table.
+    pub fn reinit_from(&mut self, other: &Self) {
+        self.reset();
+        self.symbol_counter.extend_from_slice(&other.symbol_counter);
+        self.symbol_probabilities
+            .extend_from_slice(&other.symbol_probabilities);
+        self.decode.extend_from_slice(&other.decode);
+        self.accuracy_log = other.accuracy_log;
+    }
+
+    /// Empty the table and clear all internal state.
+    pub fn reset(&mut self) {
+        self.symbol_counter.clear();
+        self.symbol_probabilities.clear();
+        self.decode.clear();
+        self.accuracy_log = 0;
+    }
+
+    /// returns how many BYTEs (not bits) were read while building the decoder
+    pub fn build_decoder(&mut self, source: &[u8], max_log: u8) -> Result<usize, FSETableError> {
+        self.accuracy_log = 0;
+
+        let bytes_read = self.read_probabilities(source, max_log)?;
+        self.build_decoding_table()?;
+
+        Ok(bytes_read)
+    }
+
+    /// Given the provided accuracy log, build a decoding table from that log.
+    pub fn build_from_probabilities(
+        &mut self,
+        acc_log: u8,
+        probs: &[i32],
+    ) -> Result<(), FSETableError> {
+        if acc_log == 0 {
+            return Err(FSETableError::AccLogIsZero);
+        }
+        self.symbol_probabilities = probs.to_vec();
+        self.accuracy_log = acc_log;
+        self.build_decoding_table()
+    }
+
+    /// Build the actual decoding table after probabilities have been read into the table.
+    /// After this function is called, the decoding process can begin.
+    fn build_decoding_table(&mut self) -> Result<(), FSETableError> {
+        if self.symbol_probabilities.len() > self.max_symbol as usize + 1 {
+            return Err(FSETableError::TooManySymbols {
+                got: self.symbol_probabilities.len(),
+            });
+        }
+
+        self.decode.clear();
+
+        let table_size = 1 << self.accuracy_log;
+        if self.decode.len() < table_size {
+            self.decode.reserve(table_size - self.decode.len());
+        }
+        //fill with dummy entries
+        self.decode.resize(
+            table_size,
+            Entry {
+                base_line: 0,
+                num_bits: 0,
+                symbol: 0,
+            },
+        );
+
+        let mut negative_idx = table_size; //will point to the highest index with is already occupied by a negative-probability-symbol
+
+        //first scan for all -1 probabilities and place them at the top of the table
+        for symbol in 0..self.symbol_probabilities.len() {
+            if self.symbol_probabilities[symbol] == -1 {
+                negative_idx -= 1;
+                let entry = &mut self.decode[negative_idx];
+                entry.symbol = symbol as u8;
+                entry.base_line = 0;
+                entry.num_bits = self.accuracy_log;
+            }
+        }
+
+        //then place in a semi-random order all of the other symbols
+        let mut position = 0;
+        for idx in 0..self.symbol_probabilities.len() {
+            let symbol = idx as u8;
+            if self.symbol_probabilities[idx] <= 0 {
+                continue;
+            }
+
+            //for each probability point the symbol gets on slot
+            let prob = self.symbol_probabilities[idx];
+            for _ in 0..prob {
+                let entry = &mut self.decode[position];
+                entry.symbol = symbol;
+
+                position = next_position(position, table_size);
+                while position >= negative_idx {
+                    position = next_position(position, table_size);
+                    //everything above negative_idx is already taken
+                }
+            }
+        }
+
+        // baselines and num_bits can only be calculated when all symbols have been spread
+        self.symbol_counter.clear();
+        self.symbol_counter
+            .resize(self.symbol_probabilities.len(), 0);
+        for idx in 0..negative_idx {
+            let entry = &mut self.decode[idx];
+            let symbol = entry.symbol;
+            let prob = self.symbol_probabilities[symbol as usize];
+
+            let symbol_count = self.symbol_counter[symbol as usize];
+            let (bl, nb) = calc_baseline_and_numbits(table_size as u32, prob as u32, symbol_count);
+
+            //println!("symbol: {:2}, table: {}, prob: {:3}, count: {:3}, bl: {:3}, nb: {:2}", symbol, table_size, prob, symbol_count, bl, nb);
+
+            assert!(nb <= self.accuracy_log);
+            self.symbol_counter[symbol as usize] += 1;
+
+            entry.base_line = bl;
+            entry.num_bits = nb;
+        }
+        Ok(())
+    }
+
+    /// Read the accuracy log and the probability table from the source and return the number of bytes
+    /// read. If the size of the table is larger than the provided `max_log`, return an error.
+    fn read_probabilities(&mut self, source: &[u8], max_log: u8) -> Result<usize, FSETableError> {
+        self.symbol_probabilities.clear(); //just clear, we will fill a probability for each entry anyways. No need to force new allocs here
+
+        let mut br = BitReader::new(source);
+        self.accuracy_log = ACC_LOG_OFFSET + (br.get_bits(4)? as u8);
+        if self.accuracy_log > max_log {
+            return Err(FSETableError::AccLogTooBig {
+                got: self.accuracy_log,
+                max: max_log,
+            });
+        }
+        if self.accuracy_log == 0 {
+            return Err(FSETableError::AccLogIsZero);
+        }
+
+        let probability_sum = 1 << self.accuracy_log;
+        let mut probability_counter = 0;
+
+        while probability_counter < probability_sum {
+            let max_remaining_value = probability_sum - probability_counter + 1;
+            let bits_to_read = highest_bit_set(max_remaining_value);
+
+            let unchecked_value = br.get_bits(bits_to_read as usize)? as u32;
+
+            let low_threshold = ((1 << bits_to_read) - 1) - (max_remaining_value);
+            let mask = (1 << (bits_to_read - 1)) - 1;
+            let small_value = unchecked_value & mask;
+
+            let value = if small_value < low_threshold {
+                br.return_bits(1);
+                small_value
+            } else if unchecked_value > mask {
+                unchecked_value - low_threshold
+            } else {
+                unchecked_value
+            };
+            //println!("{}, {}, {}", self.symbol_probablilities.len(), unchecked_value, value);
+
+            let prob = (value as i32) - 1;
+
+            self.symbol_probabilities.push(prob);
+            if prob != 0 {
+                if prob > 0 {
+                    probability_counter += prob as u32;
+                } else {
+                    // probability -1 counts as 1
+                    assert!(prob == -1);
+                    probability_counter += 1;
+                }
+            } else {
+                //fast skip further zero probabilities
+                loop {
+                    let skip_amount = br.get_bits(2)? as usize;
+
+                    self.symbol_probabilities
+                        .resize(self.symbol_probabilities.len() + skip_amount, 0);
+                    if skip_amount != 3 {
+                        break;
+                    }
+                }
+            }
+        }
+
+        if probability_counter != probability_sum {
+            return Err(FSETableError::ProbabilityCounterMismatch {
+                got: probability_counter,
+                expected_sum: probability_sum,
+                symbol_probabilities: self.symbol_probabilities.clone(),
+            });
+        }
+        if self.symbol_probabilities.len() > self.max_symbol as usize + 1 {
+            return Err(FSETableError::TooManySymbols {
+                got: self.symbol_probabilities.len(),
+            });
+        }
+
+        let bytes_read = if br.bits_read() % 8 == 0 {
+            br.bits_read() / 8
+        } else {
+            (br.bits_read() / 8) + 1
+        };
+
+        Ok(bytes_read)
+    }
+}
+
+/// A single entry in an FSE table.
+#[derive(Copy, Clone, Debug)]
+pub struct Entry {
+    /// This value is used as an offset value, and it is added
+    /// to a value read from the stream to determine the next state value.
+    pub base_line: u32,
+    /// How many bits should be read from the stream when decoding this entry.
+    pub num_bits: u8,
+    /// The byte that should be put in the decode output when encountering this state.
+    pub symbol: u8,
+}
+
+/// This value is added to the first 4 bits of the stream to determine the
+/// `Accuracy_Log`
+const ACC_LOG_OFFSET: u8 = 5;
+
+fn highest_bit_set(x: u32) -> u32 {
+    assert!(x > 0);
+    u32::BITS - x.leading_zeros()
+}
+
+//utility functions for building the decoding table from probabilities
+/// Calculate the position of the next entry of the table given the current
+/// position and size of the table.
+fn next_position(mut p: usize, table_size: usize) -> usize {
+    p += (table_size >> 1) + (table_size >> 3) + 3;
+    p &= table_size - 1;
+    p
+}
+
+fn calc_baseline_and_numbits(
+    num_states_total: u32,
+    num_states_symbol: u32,
+    state_number: u32,
+) -> (u32, u8) {
+    if num_states_symbol == 0 {
+        return (0, 0);
+    }
+    let num_state_slices = if 1 << (highest_bit_set(num_states_symbol) - 1) == num_states_symbol {
+        num_states_symbol
+    } else {
+        1 << (highest_bit_set(num_states_symbol))
+    }; //always power of two
+
+    let num_double_width_state_slices = num_state_slices - num_states_symbol; //leftovers to the power of two need to be distributed
+    let num_single_width_state_slices = num_states_symbol - num_double_width_state_slices; //these will not receive a double width slice of states
+    let slice_width = num_states_total / num_state_slices; //size of a single width slice of states
+    let num_bits = highest_bit_set(slice_width) - 1; //number of bits needed to read for one slice
+
+    if state_number < num_double_width_state_slices {
+        let baseline = num_single_width_state_slices * slice_width + state_number * slice_width * 2;
+        (baseline, num_bits as u8 + 1)
+    } else {
+        let index_shifted = state_number - num_double_width_state_slices;
+        ((index_shifted * slice_width), num_bits as u8)
+    }
+}
--- a/vendor/ruzstd/src/fse/fse_encoder.rs
+++ b/vendor/ruzstd/src/fse/fse_encoder.rs
@@ -0,0 +1,445 @@
+use crate::bit_io::BitWriter;
+use alloc::vec::Vec;
+
+pub(crate) struct FSEEncoder<'output, V: AsMut<Vec<u8>>> {
+    pub(super) table: FSETable,
+    writer: &'output mut BitWriter<V>,
+}
+
+impl<V: AsMut<Vec<u8>>> FSEEncoder<'_, V> {
+    pub fn new(table: FSETable, writer: &mut BitWriter<V>) -> FSEEncoder<'_, V> {
+        FSEEncoder { table, writer }
+    }
+
+    #[cfg(any(test, feature = "fuzz_exports"))]
+    pub fn into_table(self) -> FSETable {
+        self.table
+    }
+
+    /// Encodes the data using the provided table
+    /// Writes
+    /// * Table description
+    /// * Encoded data
+    /// * Last state index
+    /// * Padding bits to fill up last byte
+    #[cfg(any(test, feature = "fuzz_exports"))]
+    pub fn encode(&mut self, data: &[u8]) {
+        self.write_table();
+
+        let mut state = self.table.start_state(data[data.len() - 1]);
+        for x in data[0..data.len() - 1].iter().rev().copied() {
+            let next = self.table.next_state(x, state.index);
+            let diff = state.index - next.baseline;
+            self.writer.write_bits(diff as u64, next.num_bits as usize);
+            state = next;
+        }
+        self.writer
+            .write_bits(state.index as u64, self.acc_log() as usize);
+
+        let bits_to_fill = self.writer.misaligned();
+        if bits_to_fill == 0 {
+            self.writer.write_bits(1u32, 8);
+        } else {
+            self.writer.write_bits(1u32, bits_to_fill);
+        }
+    }
+
+    /// Encodes the data using the provided table but with two interleaved streams
+    /// Writes
+    /// * Table description
+    /// * Encoded data with two interleaved states
+    /// * Both Last state indexes
+    /// * Padding bits to fill up last byte
+    pub fn encode_interleaved(&mut self, data: &[u8]) {
+        self.write_table();
+
+        let mut state_1 = self.table.start_state(data[data.len() - 1]);
+        let mut state_2 = self.table.start_state(data[data.len() - 2]);
+
+        // The first two symbols are represented by the start states
+        // Then encode the state transitions for two symbols at a time
+        let mut idx = data.len() - 4;
+        loop {
+            {
+                let state = state_1;
+                let x = data[idx + 1];
+                let next = self.table.next_state(x, state.index);
+                let diff = state.index - next.baseline;
+                self.writer.write_bits(diff as u64, next.num_bits as usize);
+                state_1 = next;
+            }
+            {
+                let state = state_2;
+                let x = data[idx];
+                let next = self.table.next_state(x, state.index);
+                let diff = state.index - next.baseline;
+                self.writer.write_bits(diff as u64, next.num_bits as usize);
+                state_2 = next;
+            }
+
+            if idx < 2 {
+                break;
+            }
+            idx -= 2;
+        }
+
+        // Determine if we have an even or odd number of symbols to encode
+        // If odd we need to encode the last states transition and encode the final states in the flipped order
+        if idx == 1 {
+            let state = state_1;
+            let x = data[0];
+            let next = self.table.next_state(x, state.index);
+            let diff = state.index - next.baseline;
+            self.writer.write_bits(diff as u64, next.num_bits as usize);
+            state_1 = next;
+
+            self.writer
+                .write_bits(state_2.index as u64, self.acc_log() as usize);
+            self.writer
+                .write_bits(state_1.index as u64, self.acc_log() as usize);
+        } else {
+            self.writer
+                .write_bits(state_1.index as u64, self.acc_log() as usize);
+            self.writer
+                .write_bits(state_2.index as u64, self.acc_log() as usize);
+        }
+
+        let bits_to_fill = self.writer.misaligned();
+        if bits_to_fill == 0 {
+            self.writer.write_bits(1u32, 8);
+        } else {
+            self.writer.write_bits(1u32, bits_to_fill);
+        }
+    }
+
+    fn write_table(&mut self) {
+        self.table.write_table(self.writer);
+    }
+
+    pub(super) fn acc_log(&self) -> u8 {
+        self.table.acc_log()
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct FSETable {
+    /// Indexed by symbol
+    pub(super) states: [SymbolStates; 256],
+    /// Sum of all states.states.len()
+    pub(crate) table_size: usize,
+}
+
+impl FSETable {
+    pub(crate) fn next_state(&self, symbol: u8, idx: usize) -> &State {
+        let states = &self.states[symbol as usize];
+        states.get(idx, self.table_size)
+    }
+
+    pub(crate) fn start_state(&self, symbol: u8) -> &State {
+        let states = &self.states[symbol as usize];
+        &states.states[0]
+    }
+
+    pub fn acc_log(&self) -> u8 {
+        self.table_size.ilog2() as u8
+    }
+
+    pub fn write_table<V: AsMut<Vec<u8>>>(&self, writer: &mut BitWriter<V>) {
+        writer.write_bits(self.acc_log() - 5, 4);
+        let mut probability_counter = 0usize;
+        let probability_sum = 1 << self.acc_log();
+
+        let mut prob_idx = 0;
+        while probability_counter < probability_sum {
+            let max_remaining_value = probability_sum - probability_counter + 1;
+            let bits_to_write = max_remaining_value.ilog2() + 1;
+            let low_threshold = ((1 << bits_to_write) - 1) - (max_remaining_value);
+            let mask = (1 << (bits_to_write - 1)) - 1;
+
+            let prob = self.states[prob_idx].probability;
+            prob_idx += 1;
+            let value = (prob + 1) as u32;
+            if value < low_threshold as u32 {
+                writer.write_bits(value, bits_to_write as usize - 1);
+            } else if value > mask {
+                writer.write_bits(value + low_threshold as u32, bits_to_write as usize);
+            } else {
+                writer.write_bits(value, bits_to_write as usize);
+            }
+
+            if prob == -1 {
+                probability_counter += 1;
+            } else if prob > 0 {
+                probability_counter += prob as usize;
+            } else {
+                let mut zeros = 0u8;
+                while self.states[prob_idx].probability == 0 {
+                    zeros += 1;
+                    prob_idx += 1;
+                    if zeros == 3 {
+                        writer.write_bits(3u8, 2);
+                        zeros = 0;
+                    }
+                }
+                writer.write_bits(zeros, 2);
+            }
+        }
+        writer.write_bits(0u8, writer.misaligned());
+    }
+}
+
+#[derive(Debug, Clone)]
+pub(super) struct SymbolStates {
+    /// Sorted by baseline to allow easy lookup using an index
+    pub(super) states: Vec<State>,
+    pub(super) probability: i32,
+}
+
+impl SymbolStates {
+    fn get(&self, idx: usize, max_idx: usize) -> &State {
+        let start_search_at = (idx * self.states.len()) / max_idx;
+        self.states[start_search_at..]
+            .iter()
+            .find(|state| state.contains(idx))
+            .unwrap()
+    }
+}
+
+#[derive(Debug, Clone)]
+pub(crate) struct State {
+    /// How many bits the range of this state needs to be encoded as
+    pub(crate) num_bits: u8,
+    /// The first index targeted by this state
+    pub(crate) baseline: usize,
+    /// The last index targeted by this state (baseline + the maximum number with numbits bits allows)
+    pub(crate) last_index: usize,
+    /// Index of this state in the decoding table
+    pub(crate) index: usize,
+}
+
+impl State {
+    fn contains(&self, idx: usize) -> bool {
+        self.baseline <= idx && self.last_index >= idx
+    }
+}
+
+pub fn build_table_from_data(
+    data: impl Iterator<Item = u8>,
+    max_log: u8,
+    avoid_0_numbit: bool,
+) -> FSETable {
+    let mut counts = [0; 256];
+    let mut max_symbol = 0;
+    for x in data {
+        counts[x as usize] += 1;
+    }
+    for (idx, count) in counts.iter().copied().enumerate() {
+        if count > 0 {
+            max_symbol = idx;
+        }
+    }
+    build_table_from_counts(&counts[..=max_symbol], max_log, avoid_0_numbit)
+}
+
+fn build_table_from_counts(counts: &[usize], max_log: u8, avoid_0_numbit: bool) -> FSETable {
+    let mut probs = [0; 256];
+    let probs = &mut probs[..counts.len()];
+    let mut min_count = 0;
+    for (idx, count) in counts.iter().copied().enumerate() {
+        probs[idx] = count as i32;
+        if count > 0 && (count < min_count || min_count == 0) {
+            min_count = count;
+        }
+    }
+
+    // shift all probabilities down so that the lowest are 1
+    min_count -= 1;
+    let mut max_prob = 0i32;
+    for prob in probs.iter_mut() {
+        if *prob > 0 {
+            *prob -= min_count as i32;
+        }
+        max_prob = max_prob.max(*prob);
+    }
+
+    if max_prob > 0 && max_prob as usize > probs.len() {
+        let divisor = max_prob / (probs.len() as i32);
+        for prob in probs.iter_mut() {
+            if *prob > 0 {
+                *prob = (*prob / divisor).max(1)
+            }
+        }
+    }
+
+    // normalize probabilities to a 2^x
+    let sum = probs.iter().sum::<i32>();
+    assert!(sum > 0);
+    let sum = sum as usize;
+    let acc_log = (sum.ilog2() as u8 + 1).max(5);
+    let acc_log = u8::min(acc_log, max_log);
+
+    if sum < 1 << acc_log {
+        // just raise the maximum probability as much as possible
+        // TODO is this optimal?
+        let diff = (1 << acc_log) - sum;
+        let max = probs.iter_mut().max().unwrap();
+        *max += diff as i32;
+    } else {
+        // decrease the smallest ones to 1 first
+        let mut diff = sum - (1 << acc_log);
+        while diff > 0 {
+            let min = probs.iter_mut().filter(|prob| **prob > 1).min().unwrap();
+            let decrease = usize::min(*min as usize - 1, diff);
+            diff -= decrease;
+            *min -= decrease as i32;
+        }
+    }
+    let max = probs.iter_mut().max().unwrap();
+    if avoid_0_numbit && *max > 1 << (acc_log - 1) {
+        let redistribute = *max - (1 << (acc_log - 1));
+        *max -= redistribute;
+        let max = *max;
+
+        // find first occurence of the second_max to avoid lifting the last zero
+        let second_max = *probs.iter_mut().filter(|x| **x != max).max().unwrap();
+        let second_max = probs.iter_mut().find(|x| **x == second_max).unwrap();
+        *second_max += redistribute;
+        assert!(*second_max <= max);
+    }
+
+    build_table_from_probabilities(probs, acc_log)
+}
+
+pub(super) fn build_table_from_probabilities(probs: &[i32], acc_log: u8) -> FSETable {
+    let mut states = core::array::from_fn::<SymbolStates, 256, _>(|_| SymbolStates {
+        states: Vec::new(),
+        probability: 0,
+    });
+
+    // distribute -1 symbols
+    let mut negative_idx = (1 << acc_log) - 1;
+    for (symbol, _prob) in probs
+        .iter()
+        .copied()
+        .enumerate()
+        .filter(|prob| prob.1 == -1)
+    {
+        states[symbol].states.push(State {
+            num_bits: acc_log,
+            baseline: 0,
+            last_index: (1 << acc_log) - 1,
+            index: negative_idx,
+        });
+        states[symbol].probability = -1;
+        negative_idx -= 1;
+    }
+
+    // distribute other symbols
+
+    // Setup all needed states per symbol with their respective index
+    let mut idx = 0;
+    for (symbol, prob) in probs.iter().copied().enumerate() {
+        if prob <= 0 {
+            continue;
+        }
+        states[symbol].probability = prob;
+        let states = &mut states[symbol].states;
+        for _ in 0..prob {
+            states.push(State {
+                num_bits: 0,
+                baseline: 0,
+                last_index: 0,
+                index: idx,
+            });
+
+            idx = next_position(idx, 1 << acc_log);
+            while idx > negative_idx {
+                idx = next_position(idx, 1 << acc_log);
+            }
+        }
+        assert_eq!(states.len(), prob as usize);
+    }
+
+    // After all states know their index we can determine the numbits and baselines
+    for (symbol, prob) in probs.iter().copied().enumerate() {
+        if prob <= 0 {
+            continue;
+        }
+        let prob = prob as u32;
+        let state = &mut states[symbol];
+
+        // We process the states in their order in the table
+        state.states.sort_by(|l, r| l.index.cmp(&r.index));
+
+        let prob_log = if prob.is_power_of_two() {
+            prob.ilog2()
+        } else {
+            prob.ilog2() + 1
+        };
+        let rounded_up = 1u32 << prob_log;
+
+        // The lower states target double the amount of indexes -> numbits + 1
+        let double_states = rounded_up - prob;
+        let single_states = prob - double_states;
+        let num_bits = acc_log - prob_log as u8;
+        let mut baseline = (single_states as usize * (1 << (num_bits))) % (1 << acc_log);
+        for (idx, state) in state.states.iter_mut().enumerate() {
+            if (idx as u32) < double_states {
+                let num_bits = num_bits + 1;
+                state.baseline = baseline;
+                state.num_bits = num_bits;
+                state.last_index = baseline + ((1 << num_bits) - 1);
+
+                baseline += 1 << num_bits;
+                baseline %= 1 << acc_log;
+            } else {
+                state.baseline = baseline;
+                state.num_bits = num_bits;
+                state.last_index = baseline + ((1 << num_bits) - 1);
+                baseline += 1 << num_bits;
+            }
+        }
+
+        // For encoding we use the states ordered by the indexes they target
+        state.states.sort_by(|l, r| l.baseline.cmp(&r.baseline));
+    }
+
+    FSETable {
+        table_size: 1 << acc_log,
+        states,
+    }
+}
+
+/// Calculate the position of the next entry of the table given the current
+/// position and size of the table.
+fn next_position(mut p: usize, table_size: usize) -> usize {
+    p += (table_size >> 1) + (table_size >> 3) + 3;
+    p &= table_size - 1;
+    p
+}
+
+const ML_DIST: &[i32] = &[
+    1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1,
+];
+
+const LL_DIST: &[i32] = &[
+    4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
+    -1, -1, -1, -1,
+];
+
+const OF_DIST: &[i32] = &[
+    1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1,
+];
+
+pub(crate) fn default_ml_table() -> FSETable {
+    build_table_from_probabilities(ML_DIST, 6)
+}
+
+pub(crate) fn default_ll_table() -> FSETable {
+    build_table_from_probabilities(LL_DIST, 6)
+}
+
+pub(crate) fn default_of_table() -> FSETable {
+    build_table_from_probabilities(OF_DIST, 5)
+}
--- a/vendor/ruzstd/src/fse/mod.rs
+++ b/vendor/ruzstd/src/fse/mod.rs
@@ -0,0 +1,139 @@
+//! FSE, short for Finite State Entropy, is an encoding technique
+//! that assigns shorter codes to symbols that appear more frequently in data,
+//! and longer codes to less frequent symbols.
+//!
+//! FSE works by mutating a state and using that state to index into a table.
+//!
+//! Zstandard uses two different kinds of entropy encoding: FSE, and Huffman coding.
+//! Huffman is used to compress literals,
+//! while FSE is used for all other symbols (literal length code, match length code, offset code).
+//!
+//! <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#fse>
+//!
+//! <https://arxiv.org/pdf/1311.2540>
+
+mod fse_decoder;
+
+pub use fse_decoder::*;
+
+pub mod fse_encoder;
+
+#[test]
+fn tables_equal() {
+    let probs = &[0, 0, -1, 3, 2, 2, (1 << 6) - 8];
+    let mut dec_table = FSETable::new(255);
+    dec_table.build_from_probabilities(6, probs).unwrap();
+    let enc_table = fse_encoder::build_table_from_probabilities(probs, 6);
+
+    check_tables(&dec_table, &enc_table);
+}
+
+#[cfg(any(test, feature = "fuzz_exports"))]
+fn check_tables(dec_table: &fse_decoder::FSETable, enc_table: &fse_encoder::FSETable) {
+    for (idx, dec_state) in dec_table.decode.iter().enumerate() {
+        let enc_states = &enc_table.states[dec_state.symbol as usize];
+        let enc_state = enc_states
+            .states
+            .iter()
+            .find(|state| state.index == idx)
+            .unwrap();
+        assert_eq!(enc_state.baseline, dec_state.base_line as usize);
+        assert_eq!(enc_state.num_bits, dec_state.num_bits);
+    }
+}
+
+#[test]
+fn roundtrip() {
+    round_trip(&(0..64).collect::<alloc::vec::Vec<_>>());
+    let mut data = alloc::vec![];
+    data.extend(0..32);
+    data.extend(0..32);
+    data.extend(0..32);
+    data.extend(0..32);
+    data.extend(0..32);
+    data.extend(20..32);
+    data.extend(20..32);
+    data.extend(0..32);
+    data.extend(20..32);
+    data.extend(100..255);
+    data.extend(20..32);
+    data.extend(20..32);
+    round_trip(&data);
+
+    #[cfg(feature = "std")]
+    if std::fs::exists("fuzz/artifacts/fse").unwrap_or(false) {
+        for file in std::fs::read_dir("fuzz/artifacts/fse").unwrap() {
+            if file.as_ref().unwrap().file_type().unwrap().is_file() {
+                let data = std::fs::read(file.unwrap().path()).unwrap();
+                round_trip(&data);
+            }
+        }
+    }
+}
+
+/// Only needed for testing.
+///
+/// Encodes the data with a table built from that data
+/// Decodes the result again by first decoding the table and then the data
+/// Asserts that the decoded data equals the input
+#[cfg(any(test, feature = "fuzz_exports"))]
+pub fn round_trip(data: &[u8]) {
+    use crate::bit_io::{BitReaderReversed, BitWriter};
+    use fse_encoder::FSEEncoder;
+
+    if data.len() < 2 {
+        return;
+    }
+    if data.iter().all(|x| *x == data[0]) {
+        return;
+    }
+    if data.len() < 64 {
+        return;
+    }
+
+    let mut writer = BitWriter::new();
+    let mut encoder = FSEEncoder::new(
+        fse_encoder::build_table_from_data(data.iter().copied(), 22, false),
+        &mut writer,
+    );
+    let mut dec_table = FSETable::new(255);
+    encoder.encode(data);
+    let acc_log = encoder.acc_log();
+    let enc_table = encoder.into_table();
+    let encoded = writer.dump();
+
+    let table_bytes = dec_table.build_decoder(&encoded, acc_log).unwrap();
+    let encoded = &encoded[table_bytes..];
+    let mut decoder = FSEDecoder::new(&dec_table);
+
+    check_tables(&dec_table, &enc_table);
+
+    let mut br = BitReaderReversed::new(encoded);
+    let mut skipped_bits = 0;
+    loop {
+        let val = br.get_bits(1);
+        skipped_bits += 1;
+        if val == 1 || skipped_bits > 8 {
+            break;
+        }
+    }
+    if skipped_bits > 8 {
+        //if more than 7 bits are 0, this is not the correct end of the bitstream. Either a bug or corrupted data
+        panic!("Corrupted end marker");
+    }
+    decoder.init_state(&mut br).unwrap();
+    let mut decoded = alloc::vec::Vec::new();
+
+    for x in data {
+        let w = decoder.decode_symbol();
+        assert_eq!(w, *x);
+        decoded.push(w);
+        if decoded.len() < data.len() {
+            decoder.update_state(&mut br);
+        }
+    }
+
+    assert_eq!(&decoded, data);
+
+    assert_eq!(br.bits_remaining(), 0);
+}