286 lines
8.8 KiB
Rust
286 lines
8.8 KiB
Rust
//! Provides [Alphabet] and constants for alphabets commonly used in the wild.
|
|
|
|
use crate::PAD_BYTE;
|
|
use core::{convert, fmt};
|
|
#[cfg(any(feature = "std", test))]
|
|
use std::error;
|
|
|
|
const ALPHABET_SIZE: usize = 64;
|
|
|
|
/// An alphabet defines the 64 ASCII characters (symbols) used for base64.
|
|
///
|
|
/// Common alphabets are provided as constants, and custom alphabets
|
|
/// can be made via `from_str` or the `TryFrom<str>` implementation.
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// Building and using a custom Alphabet:
|
|
///
|
|
/// ```
|
|
/// let custom = base64::alphabet::Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap();
|
|
///
|
|
/// let engine = base64::engine::GeneralPurpose::new(
|
|
/// &custom,
|
|
/// base64::engine::general_purpose::PAD);
|
|
/// ```
|
|
///
|
|
/// Building a const:
|
|
///
|
|
/// ```
|
|
/// use base64::alphabet::Alphabet;
|
|
///
|
|
/// static CUSTOM: Alphabet = {
|
|
/// // Result::unwrap() isn't const yet, but panic!() is OK
|
|
/// match Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") {
|
|
/// Ok(x) => x,
|
|
/// Err(_) => panic!("creation of alphabet failed"),
|
|
/// }
|
|
/// };
|
|
/// ```
|
|
///
|
|
/// Building lazily:
|
|
///
|
|
/// ```
|
|
/// use base64::{
|
|
/// alphabet::Alphabet,
|
|
/// engine::{general_purpose::GeneralPurpose, GeneralPurposeConfig},
|
|
/// };
|
|
/// use once_cell::sync::Lazy;
|
|
///
|
|
/// static CUSTOM: Lazy<Alphabet> = Lazy::new(||
|
|
/// Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/").unwrap()
|
|
/// );
|
|
/// ```
|
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
|
pub struct Alphabet {
|
|
pub(crate) symbols: [u8; ALPHABET_SIZE],
|
|
}
|
|
|
|
impl Alphabet {
|
|
/// Performs no checks so that it can be const.
|
|
/// Used only for known-valid strings.
|
|
const fn from_str_unchecked(alphabet: &str) -> Self {
|
|
let mut symbols = [0_u8; ALPHABET_SIZE];
|
|
let source_bytes = alphabet.as_bytes();
|
|
|
|
// a way to copy that's allowed in const fn
|
|
let mut index = 0;
|
|
while index < ALPHABET_SIZE {
|
|
symbols[index] = source_bytes[index];
|
|
index += 1;
|
|
}
|
|
|
|
Self { symbols }
|
|
}
|
|
|
|
/// Create an `Alphabet` from a string of 64 unique printable ASCII bytes.
|
|
///
|
|
/// The `=` byte is not allowed as it is used for padding.
|
|
pub const fn new(alphabet: &str) -> Result<Self, ParseAlphabetError> {
|
|
let bytes = alphabet.as_bytes();
|
|
if bytes.len() != ALPHABET_SIZE {
|
|
return Err(ParseAlphabetError::InvalidLength);
|
|
}
|
|
|
|
{
|
|
let mut index = 0;
|
|
while index < ALPHABET_SIZE {
|
|
let byte = bytes[index];
|
|
|
|
// must be ascii printable. 127 (DEL) is commonly considered printable
|
|
// for some reason but clearly unsuitable for base64.
|
|
if !(byte >= 32_u8 && byte <= 126_u8) {
|
|
return Err(ParseAlphabetError::UnprintableByte(byte));
|
|
}
|
|
// = is assumed to be padding, so cannot be used as a symbol
|
|
if byte == PAD_BYTE {
|
|
return Err(ParseAlphabetError::ReservedByte(byte));
|
|
}
|
|
|
|
// Check for duplicates while staying within what const allows.
|
|
// It's n^2, but only over 64 hot bytes, and only once, so it's likely in the single digit
|
|
// microsecond range.
|
|
|
|
let mut probe_index = 0;
|
|
while probe_index < ALPHABET_SIZE {
|
|
if probe_index == index {
|
|
probe_index += 1;
|
|
continue;
|
|
}
|
|
|
|
let probe_byte = bytes[probe_index];
|
|
|
|
if byte == probe_byte {
|
|
return Err(ParseAlphabetError::DuplicatedByte(byte));
|
|
}
|
|
|
|
probe_index += 1;
|
|
}
|
|
|
|
index += 1;
|
|
}
|
|
}
|
|
|
|
Ok(Self::from_str_unchecked(alphabet))
|
|
}
|
|
|
|
/// Create a `&str` from the symbols in the `Alphabet`
|
|
pub fn as_str(&self) -> &str {
|
|
core::str::from_utf8(&self.symbols).unwrap()
|
|
}
|
|
}
|
|
|
|
impl convert::TryFrom<&str> for Alphabet {
|
|
type Error = ParseAlphabetError;
|
|
|
|
fn try_from(value: &str) -> Result<Self, Self::Error> {
|
|
Self::new(value)
|
|
}
|
|
}
|
|
|
|
/// Possible errors when constructing an [Alphabet] from a `str`.
|
|
#[derive(Debug, Eq, PartialEq)]
|
|
pub enum ParseAlphabetError {
|
|
/// Alphabets must be 64 ASCII bytes
|
|
InvalidLength,
|
|
/// All bytes must be unique
|
|
DuplicatedByte(u8),
|
|
/// All bytes must be printable (in the range `[32, 126]`).
|
|
UnprintableByte(u8),
|
|
/// `=` cannot be used
|
|
ReservedByte(u8),
|
|
}
|
|
|
|
impl fmt::Display for ParseAlphabetError {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
Self::InvalidLength => write!(f, "Invalid length - must be 64 bytes"),
|
|
Self::DuplicatedByte(b) => write!(f, "Duplicated byte: {:#04x}", b),
|
|
Self::UnprintableByte(b) => write!(f, "Unprintable byte: {:#04x}", b),
|
|
Self::ReservedByte(b) => write!(f, "Reserved byte: {:#04x}", b),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(any(feature = "std", test))]
|
|
impl error::Error for ParseAlphabetError {}
|
|
|
|
/// The standard alphabet (with `+` and `/`) specified in [RFC 4648][].
|
|
///
|
|
/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-4
|
|
pub const STANDARD: Alphabet = Alphabet::from_str_unchecked(
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
|
|
);
|
|
|
|
/// The URL-safe alphabet (with `-` and `_`) specified in [RFC 4648][].
|
|
///
|
|
/// [RFC 4648]: https://datatracker.ietf.org/doc/html/rfc4648#section-5
|
|
pub const URL_SAFE: Alphabet = Alphabet::from_str_unchecked(
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_",
|
|
);
|
|
|
|
/// The `crypt(3)` alphabet (with `.` and `/` as the _first_ two characters).
|
|
///
|
|
/// Not standardized, but folk wisdom on the net asserts that this alphabet is what crypt uses.
|
|
pub const CRYPT: Alphabet = Alphabet::from_str_unchecked(
|
|
"./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
|
|
);
|
|
|
|
/// The bcrypt alphabet.
|
|
pub const BCRYPT: Alphabet = Alphabet::from_str_unchecked(
|
|
"./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
|
|
);
|
|
|
|
/// The alphabet used in IMAP-modified UTF-7 (with `+` and `,`).
|
|
///
|
|
/// See [RFC 3501](https://tools.ietf.org/html/rfc3501#section-5.1.3)
|
|
pub const IMAP_MUTF7: Alphabet = Alphabet::from_str_unchecked(
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
|
|
);
|
|
|
|
/// The alphabet used in BinHex 4.0 files.
|
|
///
|
|
/// See [BinHex 4.0 Definition](http://files.stairways.com/other/binhex-40-specs-info.txt)
|
|
pub const BIN_HEX: Alphabet = Alphabet::from_str_unchecked(
|
|
"!\"#$%&'()*+,-0123456789@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdehijklmpqr",
|
|
);
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use crate::alphabet::*;
|
|
use core::convert::TryFrom as _;
|
|
|
|
#[test]
|
|
fn detects_duplicate_start() {
|
|
assert_eq!(
|
|
ParseAlphabetError::DuplicatedByte(b'A'),
|
|
Alphabet::new("AACDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
|
|
.unwrap_err()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn detects_duplicate_end() {
|
|
assert_eq!(
|
|
ParseAlphabetError::DuplicatedByte(b'/'),
|
|
Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789//")
|
|
.unwrap_err()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn detects_duplicate_middle() {
|
|
assert_eq!(
|
|
ParseAlphabetError::DuplicatedByte(b'Z'),
|
|
Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/")
|
|
.unwrap_err()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn detects_length() {
|
|
assert_eq!(
|
|
ParseAlphabetError::InvalidLength,
|
|
Alphabet::new(
|
|
"xxxxxxxxxABCDEFGHIJKLMNOPQRSTUVWXYZZbcdefghijklmnopqrstuvwxyz0123456789+/",
|
|
)
|
|
.unwrap_err()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn detects_padding() {
|
|
assert_eq!(
|
|
ParseAlphabetError::ReservedByte(b'='),
|
|
Alphabet::new("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+=")
|
|
.unwrap_err()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn detects_unprintable() {
|
|
// form feed
|
|
assert_eq!(
|
|
ParseAlphabetError::UnprintableByte(0xc),
|
|
Alphabet::new("\x0cBCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
|
|
.unwrap_err()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn same_as_unchecked() {
|
|
assert_eq!(
|
|
STANDARD,
|
|
Alphabet::try_from("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/")
|
|
.unwrap()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn str_same_as_input() {
|
|
let alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
|
let a = Alphabet::try_from(alphabet).unwrap();
|
|
assert_eq!(alphabet, a.as_str())
|
|
}
|
|
}
|