//! Structures and utilities used for compressing/encoding data into the Zstd format. pub(crate) mod block_header; pub(crate) mod blocks; pub(crate) mod frame_header; pub(crate) mod match_generator; pub(crate) mod util; mod frame_compressor; mod levels; pub use frame_compressor::FrameCompressor; use crate::io::{Read, Write}; use alloc::vec::Vec; /// Convenience function to compress some source into a target without reusing any resources of the compressor /// ```rust /// use ruzstd::encoding::{compress, CompressionLevel}; /// let data: &[u8] = &[0,0,0,0,0,0,0,0,0,0,0,0]; /// let mut target = Vec::new(); /// compress(data, &mut target, CompressionLevel::Fastest); /// ``` pub fn compress(source: R, target: W, level: CompressionLevel) { let mut frame_enc = FrameCompressor::new(level); frame_enc.set_source(source); frame_enc.set_drain(target); frame_enc.compress(); } /// Convenience function to compress some source into a Vec without reusing any resources of the compressor /// ```rust /// use ruzstd::encoding::{compress_to_vec, CompressionLevel}; /// let data: &[u8] = &[0,0,0,0,0,0,0,0,0,0,0,0]; /// let compressed = compress_to_vec(data, CompressionLevel::Fastest); /// ``` pub fn compress_to_vec(source: R, level: CompressionLevel) -> Vec { let mut vec = Vec::new(); compress(source, &mut vec, level); vec } /// The compression mode used impacts the speed of compression, /// and resulting compression ratios. Faster compression will result /// in worse compression ratios, and vice versa. #[derive(Copy, Clone)] pub enum CompressionLevel { /// This level does not compress the data at all, and simply wraps /// it in a Zstandard frame. Uncompressed, /// This level is roughly equivalent to Zstd compression level 1 Fastest, /// This level is roughly equivalent to Zstd level 3, /// or the one used by the official compressor when no level /// is specified. /// /// UNIMPLEMENTED Default, /// This level is roughly equivalent to Zstd level 7. /// /// UNIMPLEMENTED Better, /// This level is roughly equivalent to Zstd level 11. /// /// UNIMPLEMENTED Best, } /// Trait used by the encoder that users can use to extend the matching facilities with their own algorithm /// making their own tradeoffs between runtime, memory usage and compression ratio /// /// This trait operates on buffers that represent the chunks of data the matching algorithm wants to work on. /// Each one of these buffers is referred to as a *space*. One or more of these buffers represent the window /// the decoder will need to decode the data again. /// /// This library asks the Matcher for a new buffer using `get_next_space` to allow reusing of allocated buffers when they are no longer part of the /// window of data that is being used for matching. /// /// The library fills the buffer with data that is to be compressed and commits them back to the matcher using `commit_space`. /// /// Then it will either call `start_matching` or, if the space is deemed not worth compressing, `skip_matching` is called. /// /// This is repeated until no more data is left to be compressed. pub trait Matcher { /// Get a space where we can put data to be matched on. Will be encoded as one block. The maximum allowed size is 128 kB. fn get_next_space(&mut self) -> alloc::vec::Vec; /// Get a reference to the last commited space fn get_last_space(&mut self) -> &[u8]; /// Commit a space to the matcher so it can be matched against fn commit_space(&mut self, space: alloc::vec::Vec); /// Just process the data in the last commited space for future matching fn skip_matching(&mut self); /// Process the data in the last commited space for future matching AND generate matches for the data fn start_matching(&mut self, handle_sequence: impl for<'a> FnMut(Sequence<'a>)); /// Reset this matcher so it can be used for the next new frame fn reset(&mut self, level: CompressionLevel); /// The size of the window the decoder will need to execute all sequences produced by this matcher /// /// May change after a call to reset with a different compression level fn window_size(&self) -> u64; } #[derive(PartialEq, Eq, Debug)] /// Sequences that a [`Matcher`] can produce pub enum Sequence<'data> { /// Is encoded as a sequence for the decoder sequence execution. /// /// First the literals will be copied to the decoded data, /// then `match_len` bytes are copied from `offset` bytes back in the buffer Triple { literals: &'data [u8], offset: usize, match_len: usize, }, /// This is returned as the last sequence in a block /// /// These literals will just be copied at the end of the sequence execution by the decoder Literals { literals: &'data [u8] }, }