119 lines
4.8 KiB
Rust
119 lines
4.8 KiB
Rust
//! Structures and utilities used for compressing/encoding data into the Zstd format.
|
|
|
|
pub(crate) mod block_header;
|
|
pub(crate) mod blocks;
|
|
pub(crate) mod frame_header;
|
|
pub(crate) mod match_generator;
|
|
pub(crate) mod util;
|
|
|
|
mod frame_compressor;
|
|
mod levels;
|
|
pub use frame_compressor::FrameCompressor;
|
|
|
|
use crate::io::{Read, Write};
|
|
use alloc::vec::Vec;
|
|
|
|
/// Convenience function to compress some source into a target without reusing any resources of the compressor
|
|
/// ```rust
|
|
/// use ruzstd::encoding::{compress, CompressionLevel};
|
|
/// let data: &[u8] = &[0,0,0,0,0,0,0,0,0,0,0,0];
|
|
/// let mut target = Vec::new();
|
|
/// compress(data, &mut target, CompressionLevel::Fastest);
|
|
/// ```
|
|
pub fn compress<R: Read, W: Write>(source: R, target: W, level: CompressionLevel) {
|
|
let mut frame_enc = FrameCompressor::new(level);
|
|
frame_enc.set_source(source);
|
|
frame_enc.set_drain(target);
|
|
frame_enc.compress();
|
|
}
|
|
|
|
/// Convenience function to compress some source into a Vec without reusing any resources of the compressor
|
|
/// ```rust
|
|
/// use ruzstd::encoding::{compress_to_vec, CompressionLevel};
|
|
/// let data: &[u8] = &[0,0,0,0,0,0,0,0,0,0,0,0];
|
|
/// let compressed = compress_to_vec(data, CompressionLevel::Fastest);
|
|
/// ```
|
|
pub fn compress_to_vec<R: Read>(source: R, level: CompressionLevel) -> Vec<u8> {
|
|
let mut vec = Vec::new();
|
|
compress(source, &mut vec, level);
|
|
vec
|
|
}
|
|
|
|
/// The compression mode used impacts the speed of compression,
|
|
/// and resulting compression ratios. Faster compression will result
|
|
/// in worse compression ratios, and vice versa.
|
|
#[derive(Copy, Clone)]
|
|
pub enum CompressionLevel {
|
|
/// This level does not compress the data at all, and simply wraps
|
|
/// it in a Zstandard frame.
|
|
Uncompressed,
|
|
/// This level is roughly equivalent to Zstd compression level 1
|
|
Fastest,
|
|
/// This level is roughly equivalent to Zstd level 3,
|
|
/// or the one used by the official compressor when no level
|
|
/// is specified.
|
|
///
|
|
/// UNIMPLEMENTED
|
|
Default,
|
|
/// This level is roughly equivalent to Zstd level 7.
|
|
///
|
|
/// UNIMPLEMENTED
|
|
Better,
|
|
/// This level is roughly equivalent to Zstd level 11.
|
|
///
|
|
/// UNIMPLEMENTED
|
|
Best,
|
|
}
|
|
|
|
/// Trait used by the encoder that users can use to extend the matching facilities with their own algorithm
|
|
/// making their own tradeoffs between runtime, memory usage and compression ratio
|
|
///
|
|
/// This trait operates on buffers that represent the chunks of data the matching algorithm wants to work on.
|
|
/// Each one of these buffers is referred to as a *space*. One or more of these buffers represent the window
|
|
/// the decoder will need to decode the data again.
|
|
///
|
|
/// This library asks the Matcher for a new buffer using `get_next_space` to allow reusing of allocated buffers when they are no longer part of the
|
|
/// window of data that is being used for matching.
|
|
///
|
|
/// The library fills the buffer with data that is to be compressed and commits them back to the matcher using `commit_space`.
|
|
///
|
|
/// Then it will either call `start_matching` or, if the space is deemed not worth compressing, `skip_matching` is called.
|
|
///
|
|
/// This is repeated until no more data is left to be compressed.
|
|
pub trait Matcher {
|
|
/// Get a space where we can put data to be matched on. Will be encoded as one block. The maximum allowed size is 128 kB.
|
|
fn get_next_space(&mut self) -> alloc::vec::Vec<u8>;
|
|
/// Get a reference to the last commited space
|
|
fn get_last_space(&mut self) -> &[u8];
|
|
/// Commit a space to the matcher so it can be matched against
|
|
fn commit_space(&mut self, space: alloc::vec::Vec<u8>);
|
|
/// Just process the data in the last commited space for future matching
|
|
fn skip_matching(&mut self);
|
|
/// Process the data in the last commited space for future matching AND generate matches for the data
|
|
fn start_matching(&mut self, handle_sequence: impl for<'a> FnMut(Sequence<'a>));
|
|
/// Reset this matcher so it can be used for the next new frame
|
|
fn reset(&mut self, level: CompressionLevel);
|
|
/// The size of the window the decoder will need to execute all sequences produced by this matcher
|
|
///
|
|
/// May change after a call to reset with a different compression level
|
|
fn window_size(&self) -> u64;
|
|
}
|
|
|
|
#[derive(PartialEq, Eq, Debug)]
|
|
/// Sequences that a [`Matcher`] can produce
|
|
pub enum Sequence<'data> {
|
|
/// Is encoded as a sequence for the decoder sequence execution.
|
|
///
|
|
/// First the literals will be copied to the decoded data,
|
|
/// then `match_len` bytes are copied from `offset` bytes back in the buffer
|
|
Triple {
|
|
literals: &'data [u8],
|
|
offset: usize,
|
|
match_len: usize,
|
|
},
|
|
/// This is returned as the last sequence in a block
|
|
///
|
|
/// These literals will just be copied at the end of the sequence execution by the decoder
|
|
Literals { literals: &'data [u8] },
|
|
}
|