nix_compat/
nixbase32.rs

1//! Implements the slightly odd "base32" encoding that's used in Nix.
2//!
3//! Nix uses a custom alphabet. Contrary to other implementations (RFC4648),
4//! encoding to "nix base32" doesn't use any padding, and reads in characters
5//! in reverse order.
6//!
7//! This is also the main reason why we can't use `data_encoding::Encoding` -
8//! it gets things wrong if there normally would be a need for padding.
9
10use std::fmt::Write;
11
12use data_encoding::{DecodeError, DecodeKind};
13
14const ALPHABET: &[u8; 32] = b"0123456789abcdfghijklmnpqrsvwxyz";
15
16/// Returns encoded input
17pub fn encode(input: &[u8]) -> String {
18    let output_len = encode_len(input.len());
19    let mut output = String::with_capacity(output_len);
20
21    for n in (0..output_len).rev() {
22        let b = n * 5; // bit offset within the entire input
23        let i = b / 8; // input byte index
24        let j = b % 8; // bit offset within that input byte
25
26        // 5-bit words aren't aligned to bytes
27        // we can only read byte-aligned units
28        // read 16 bits then shift and mask to 5
29        let c = {
30            let mut word = input[i] as u16;
31            if let Some(&msb) = input.get(i + 1) {
32                word |= (msb as u16) << 8;
33            }
34            (word >> j) & 0x1f
35        };
36
37        output.write_char(ALPHABET[c as usize] as char).unwrap();
38    }
39
40    output
41}
42
43/// This maps a nixbase32-encoded character to its binary representation, which
44/// is also the index of the character in the alphabet. Invalid characters are
45/// mapped to 0xFF, which is itself an invalid value.
46const BASE32_ORD: [u8; 256] = {
47    let mut ord = [0xFF; 256];
48    let mut alphabet = ALPHABET.as_slice();
49    let mut i = 0;
50
51    while let &[c, ref tail @ ..] = alphabet {
52        ord[c as usize] = i;
53        alphabet = tail;
54        i += 1;
55    }
56
57    ord
58};
59
60/// Returns decoded input
61pub fn decode(input: impl AsRef<[u8]>) -> Result<Vec<u8>, DecodeError> {
62    let input = input.as_ref();
63
64    let output_len = decode_len(input.len());
65    if input.len() != encode_len(output_len) {
66        return Err(DecodeError {
67            position: input.len().min(encode_len(output_len)),
68            kind: DecodeKind::Length,
69        });
70    }
71    let mut output: Vec<u8> = vec![0x00; output_len];
72
73    decode_inner(input, &mut output)?;
74    Ok(output)
75}
76
77pub fn decode_fixed<const K: usize>(input: impl AsRef<[u8]>) -> Result<[u8; K], DecodeError> {
78    let input = input.as_ref();
79
80    if input.len() != encode_len(K) {
81        return Err(DecodeError {
82            position: input.len().min(encode_len(K)),
83            kind: DecodeKind::Length,
84        });
85    }
86
87    let mut output = [0; K];
88    decode_inner(input, &mut output)?;
89    Ok(output)
90}
91
92fn decode_inner(input: &[u8], output: &mut [u8]) -> Result<(), DecodeError> {
93    // loop over all characters in reverse, and keep the iteration count in n.
94    let mut carry = 0;
95    let mut mask = 0;
96    for (n, &c) in input.iter().rev().enumerate() {
97        let b = n * 5;
98        let i = b / 8;
99        let j = b % 8;
100
101        let digit = BASE32_ORD[c as usize];
102        let value = (digit as u16) << j;
103        output[i] |= value as u8 | carry;
104        carry = (value >> 8) as u8;
105
106        mask |= digit;
107    }
108
109    if mask == 0xFF {
110        return Err(DecodeError {
111            position: find_invalid(input),
112            kind: DecodeKind::Symbol,
113        });
114    }
115
116    // if we're at the end, but have a nonzero carry, the encoding is invalid.
117    if carry != 0 {
118        return Err(DecodeError {
119            position: 0,
120            kind: DecodeKind::Trailing,
121        });
122    }
123
124    Ok(())
125}
126
127fn find_invalid(input: &[u8]) -> usize {
128    for (i, &c) in input.iter().enumerate() {
129        if !ALPHABET.contains(&c) {
130            return i;
131        }
132    }
133
134    unreachable!()
135}
136
137/// Returns the decoded length of an input of length len.
138pub const fn decode_len(len: usize) -> usize {
139    (len * 5) / 8
140}
141
142/// Returns the encoded length of an input of length len
143pub const fn encode_len(len: usize) -> usize {
144    (len * 8).div_ceil(5)
145}
146
147#[cfg(test)]
148mod tests {
149    use hex_literal::hex;
150    use rstest::rstest;
151
152    #[rstest]
153    #[case::empty_bytes("", &[])]
154    #[case::one_byte("0z", &hex!("1f"))]
155    #[case::store_path("00bgd045z0d4icpbc2yyz4gx48ak44la", &hex!("8a12321522fd91efbd60ebb2481af88580f61600"))]
156    #[case::sha256("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", &hex!("b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30"))]
157    #[test]
158    fn encode(#[case] enc: &str, #[case] dec: &[u8]) {
159        assert_eq!(enc, super::encode(dec));
160    }
161
162    #[rstest]
163    #[case::empty_bytes("", Some(&[][..]) )]
164    #[case::one_byte("0z", Some(&hex!("1f")[..]))]
165    #[case::store_path("00bgd045z0d4icpbc2yyz4gx48ak44la", Some(&hex!("8a12321522fd91efbd60ebb2481af88580f61600")[..]))]
166    #[case::sha256("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", Some(&hex!("b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30")[..]))]
167    // this is invalid encoding, because it encodes 10 1-bits, so the carry
168    // would be 2 1-bits
169    #[case::invalid_encoding_1("zz", None)]
170    // this is an even more specific example - it'd decode as 00000000 11
171    #[case::invalid_encoding_2("c0", None)]
172    // This has an invalid length
173    #[case::invalid_encoding_3("0", None)]
174    // This has an invalid length
175    #[case::invalid_encoding_4("0zz", None)]
176    #[test]
177    fn decode(#[case] enc: &str, #[case] dec: Option<&[u8]>) {
178        match dec {
179            Some(dec) => {
180                // The decode needs to match what's passed in dec
181                assert_eq!(dec, super::decode(enc).unwrap());
182            }
183            None => {
184                // the decode needs to be an error
185                assert!(super::decode(enc).is_err());
186            }
187        }
188    }
189
190    #[test]
191    fn decode_fixed() {
192        assert_eq!(
193            super::decode_fixed("00bgd045z0d4icpbc2yyz4gx48ak44la").unwrap(),
194            hex!("8a12321522fd91efbd60ebb2481af88580f61600")
195        );
196        assert_eq!(
197            super::decode_fixed::<32>("00").unwrap_err(),
198            super::DecodeError {
199                position: 2,
200                kind: super::DecodeKind::Length
201            }
202        );
203    }
204
205    #[test]
206    fn encode_len() {
207        assert_eq!(super::encode_len(0), 0);
208        assert_eq!(super::encode_len(20), 32);
209    }
210
211    #[test]
212    fn decode_len() {
213        assert_eq!(super::decode_len(0), 0);
214        assert_eq!(super::decode_len(1), 0);
215        assert_eq!(super::decode_len(2), 1);
216        assert_eq!(super::decode_len(3), 1);
217        assert_eq!(super::decode_len(4), 2);
218        assert_eq!(super::decode_len(5), 3);
219        assert_eq!(super::decode_len(32), 20);
220    }
221}