From 93d778bdcb108cfe29ff576946e9703f05596953 Mon Sep 17 00:00:00 2001 From: Hatter Jiang Date: Fri, 20 Oct 2023 00:05:22 +0800 Subject: [PATCH] feat: init commit, copied from crate chacha20-poly-aead --- .gitignore | 1 + Cargo.toml | 20 + LICENSE-APACHE | 202 ++++++++++ LICENSE-MIT | 19 + README-from-chacha20-poly1305-aead.md | 75 ++++ src/aead.rs | 476 +++++++++++++++++++++++ src/as_bytes.rs | 43 +++ src/chacha20.rs | 250 ++++++++++++ src/lib.rs | 60 +++ src/poly1305.rs | 536 ++++++++++++++++++++++++++ src/simd.rs | 110 ++++++ src/simd_opt/mod.rs | 44 +++ src/simd_opt/u32x4.rs | 71 ++++ src/simdint.rs | 20 + src/simdop.rs | 93 +++++ src/simdty.rs | 59 +++ 16 files changed, 2079 insertions(+) create mode 100644 Cargo.toml create mode 100644 LICENSE-APACHE create mode 100644 LICENSE-MIT create mode 100644 README-from-chacha20-poly1305-aead.md create mode 100644 src/aead.rs create mode 100644 src/as_bytes.rs create mode 100644 src/chacha20.rs create mode 100644 src/lib.rs create mode 100644 src/poly1305.rs create mode 100644 src/simd.rs create mode 100644 src/simd_opt/mod.rs create mode 100644 src/simd_opt/u32x4.rs create mode 100644 src/simdint.rs create mode 100644 src/simdop.rs create mode 100644 src/simdty.rs diff --git a/.gitignore b/.gitignore index 3bf25c0..409abaa 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.idea/ # ---> Rust # Generated by Cargo # will have compiled files and executables diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..5aad182 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "chacha20-poly1305-stream" +version = "0.1.0" +edition = "2021" +authors = ["Cesar Eduardo Barros ", "Hatter Jiang "] +description = "A pure Rust implementation of the ChaCha20-Poly1305 AEAD from RFC 7539." +repository = "https://git.hatter.ink/hatter/chacha20-poly1305-stream" +readme = "README.md" +keywords = ["chacha20", "poly1305", "aead", "crypto"] +license = "MIT OR Apache-2.0" + +[features] +bench = [] +simd = [] +simd_opt = ["simd"] +simd_asm = ["simd_opt"] + +[dependencies] +constant_time_eq = "0.1.0" +clippy = { version = "0.0.37", optional = true } diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 0000000..8f71f43 --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,202 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 0000000..7948117 --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,19 @@ +Copyright (c) 2015 The blake2-rfc Developers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README-from-chacha20-poly1305-aead.md b/README-from-chacha20-poly1305-aead.md new file mode 100644 index 0000000..aa51760 --- /dev/null +++ b/README-from-chacha20-poly1305-aead.md @@ -0,0 +1,75 @@ +This is a pure Rust implementation of the ChaCha20-Poly1305 AEAD from +[RFC 7539]. + +[RFC 7539]: https://tools.ietf.org/html/rfc7539 + +## Design + +There are two main designs for an encryption/decryption API: either +having one state/context struct with a method which is called repeatedly +to encrypt/decrypt the next fragment of data, or having a single +standalone function which is called once and does all the work in a +single call. + +For authenticated encryption, it's important that on decryption no +output is produced until the authentication tag is verified. That +requires two passes over the data for decryption: the first pass +verifies the tag, and the second pass does the output. It would be +needlessly complex to implement this with a state/context struct, so +this crate uses a single function call to do the whole decryption. For +simmetry, the same design is used for the encryption function. + +The base primitives (ChaCha20 and Poly1305) are not exposed separately, +since they are harder to use securely. This also allows their +implementation to be tuned to the combined use case; for instance, the +base primitives need no buffering. + +## Limitations + +The amount of data that can be encrypted in a single call is 2^32 - 1 +blocks of 64 bytes, slightly less than 256 GiB. This limit could be +increased to 2^64 bytes, if necessary, by allowing the use of a shorter +nonce. + +This crate does not attempt to clear potentially sensitive data from its +work memory (which includes the the stack and processor registers). To +do so correctly without a heavy performance penalty would require help +from the compiler. It's better to not attempt to do so than to present a +false assurance. + +## SIMD optimization + +This crate has experimental support for explicit SIMD optimizations. It +requires nightly Rust due to the use of unstable features. + +The following cargo features enable the explicit SIMD optimization: + +* `simd` enables the explicit use of SIMD vectors instead of a plain + struct +* `simd_opt` additionally enables the use of SIMD shuffles to implement + some of the rotates + +While one might expect that each of these is faster than the previous +one, and that they are all faster than not enabling explicit SIMD +vectors, that's not always the case. It can vary depending on target +architecture and compiler options. If you need the extra speed from +these optimizations, benchmark each one (the `bench` feature enables +`cargo bench` in this crate, so you can use for instance `cargo bench +--features="bench simd_opt"`). They have currently been tuned for SSE2 +(x86 and x86-64) and NEON (arm). + +## License + +Licensed under either of + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) + +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally +submitted for inclusion in the work by you, as defined in the Apache-2.0 +license, shall be dual licensed as above, without any additional terms or +conditions. diff --git a/src/aead.rs b/src/aead.rs new file mode 100644 index 0000000..d9a378e --- /dev/null +++ b/src/aead.rs @@ -0,0 +1,476 @@ +// Copyright 2016 chacha20-poly1305-aead Developers +// +// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be +// copied, modified, or distributed except according to those terms. + +use std::error::Error; +use std::fmt::{self, Display, Formatter}; +use std::io::{self, ErrorKind, Read, Write}; + +use crate::as_bytes::AsBytes; +use crate::chacha20::ChaCha20; +use constant_time_eq::constant_time_eq; +use crate::poly1305::Poly1305; +use crate::simd::u32x4; + +const CHACHA20_COUNTER_OVERFLOW: u64 = ((1 << 32) - 1) * 64; + +/// Encrypts a byte slice and returns the authentication tag. +/// +/// # Example +/// +/// ``` +/// use chacha20_poly1305_aead::encrypt; +/// +/// let key = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, +/// 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]; +/// let nonce = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; +/// let aad = [1, 2, 3, 4]; +/// +/// let plaintext = b"hello, world"; +/// +/// // Vec implements the Write trait +/// let mut ciphertext = Vec::with_capacity(plaintext.len()); +/// +/// let tag = encrypt(&key, &nonce, &aad, plaintext, &mut ciphertext).unwrap(); +/// +/// assert_eq!(ciphertext, [0xfc, 0x5a, 0x17, 0x82, +/// 0xab, 0xcf, 0xbc, 0x5d, 0x18, 0x29, 0xbf, 0x97]); +/// assert_eq!(tag, [0xdb, 0xb7, 0x0d, 0xda, 0xbd, 0xfa, 0x8c, 0xa5, +/// 0x60, 0xa2, 0x30, 0x3d, 0xe6, 0x07, 0x92, 0x10]); +/// ``` +pub fn encrypt(key: &[u8], nonce: &[u8], + aad: &[u8], mut input: &[u8], + output: &mut W) -> io::Result<[u8; 16]> { + encrypt_read(key, nonce, aad, &mut input, output) +} + +/// Encrypts bytes from a reader and returns the authentication tag. +/// +/// This function is identical to the `encrypt` function, the only +/// difference being that its input comes from a reader instead of a +/// byte slice. +pub fn encrypt_read(key: &[u8], nonce: &[u8], + aad: &[u8], input: &mut R, + output: &mut W) -> io::Result<[u8; 16]> { + let mut chacha20 = ChaCha20::new(key, nonce); + let mut poly1305 = Poly1305::new(&chacha20.next().as_bytes()[..32]); + + let aad_len = aad.len() as u64; + let mut input_len = 0; + + poly1305.padded_blocks(aad); + + let mut buf = [u32x4::default(); 4]; + loop { + let read = read_all(input, buf.as_mut_bytes())?; + if read == 0 { break; } + + input_len += read as u64; + if input_len >= CHACHA20_COUNTER_OVERFLOW { + return Err(io::Error::new(ErrorKind::WriteZero, + "counter overflow")); + } + + let block = chacha20.next(); + buf[0] = buf[0] ^ block[0]; + buf[1] = buf[1] ^ block[1]; + buf[2] = buf[2] ^ block[2]; + buf[3] = buf[3] ^ block[3]; + + poly1305.padded_blocks(&buf.as_bytes()[..read]); + output.write_all(&buf.as_bytes()[..read])?; + } + + poly1305.block([aad_len.to_le(), input_len.to_le()].as_bytes()); + + let mut tag = [0; 16]; + tag.clone_from_slice(poly1305.tag().as_bytes()); + Ok(tag) +} + +/// Verifies the authentication tag and decrypts a byte slice. +/// +/// If the tag does not match, this function produces no output and +/// returns `Err(DecryptError::TagMismatch)`. +/// +/// # Example +/// +/// ``` +/// # use chacha20_poly1305_aead::DecryptError; +/// # fn example() -> Result<(), DecryptError> { +/// use chacha20_poly1305_aead::decrypt; +/// +/// let key = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, +/// 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]; +/// let nonce = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; +/// let aad = [1, 2, 3, 4]; +/// +/// let ciphertext = [0xfc, 0x5a, 0x17, 0x82, 0xab, 0xcf, 0xbc, 0x5d, +/// 0x18, 0x29, 0xbf, 0x97]; +/// let tag = [0xdb, 0xb7, 0x0d, 0xda, 0xbd, 0xfa, 0x8c, 0xa5, +/// 0x60, 0xa2, 0x30, 0x3d, 0xe6, 0x07, 0x92, 0x10]; +/// +/// // Vec implements the Write trait +/// let mut plaintext = Vec::with_capacity(ciphertext.len()); +/// +/// try!(decrypt(&key, &nonce, &aad, &ciphertext, &tag, &mut plaintext)); +/// +/// assert_eq!(plaintext, b"hello, world"); +/// # Ok(()) +/// # } +/// # example().unwrap(); +/// ``` +pub fn decrypt(key: &[u8], nonce: &[u8], + aad: &[u8], mut input: &[u8], tag: &[u8], + output: &mut W) -> Result<(), DecryptError> { + let mut chacha20 = ChaCha20::new(key, nonce); + let mut poly1305 = Poly1305::new(&chacha20.next().as_bytes()[..32]); + + let aad_len = aad.len() as u64; + let input_len = input.len() as u64; + assert!(tag.len() == 16); + + if input_len >= CHACHA20_COUNTER_OVERFLOW { + return Err(io::Error::new(ErrorKind::WriteZero, + "counter overflow").into()); + } + + poly1305.padded_blocks(aad); + poly1305.padded_blocks(input); + poly1305.block([aad_len.to_le(), input_len.to_le()].as_bytes()); + + if !constant_time_eq(poly1305.tag().as_bytes(), tag) { + return Err(DecryptError::TagMismatch); + } + + let mut buf = [u32x4::default(); 4]; + loop { + let read = read_all(&mut input, buf.as_mut_bytes())?; + if read == 0 { break; } + + let block = chacha20.next(); + buf[0] = buf[0] ^ block[0]; + buf[1] = buf[1] ^ block[1]; + buf[2] = buf[2] ^ block[2]; + buf[3] = buf[3] ^ block[3]; + + output.write_all(&buf.as_bytes()[..read])?; + } + + Ok(()) +} + +fn read_all(reader: &mut R, mut buf: &mut [u8]) -> io::Result { + let mut read = 0; + while !buf.is_empty() { + match reader.read(buf) { + Ok(0) => break, + Ok(n) => { read += n; let tmp = buf; buf = &mut tmp[n..]; } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} + Err(e) => return Err(e), + } + } + Ok(read) +} + +/// Error returned from the `decrypt` function. +#[derive(Debug)] +pub enum DecryptError { + /// The calculated Poly1305 tag did not match the given tag. + TagMismatch, + + /// There was an error writing the output. + IoError(io::Error), +} + +impl Display for DecryptError { + fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { + match *self { + DecryptError::TagMismatch => fmt.write_str(self.description()), + DecryptError::IoError(ref e) => e.fmt(fmt), + } + } +} + +impl Error for DecryptError { + fn description(&self) -> &str { + match *self { + DecryptError::TagMismatch => "authentication tag mismatch", + DecryptError::IoError(ref e) => e.description(), + } + } + + fn cause(&self) -> Option<&dyn Error> { + match *self { + DecryptError::TagMismatch => None, + DecryptError::IoError(ref e) => Some(e), + } + } +} + +impl From for DecryptError { + fn from(error: io::Error) -> Self { + DecryptError::IoError(error) + } +} + +impl From for io::Error { + fn from(error: DecryptError) -> Self { + match error { + DecryptError::IoError(e) => e, + DecryptError::TagMismatch => + io::Error::new(ErrorKind::InvalidData, error), + } + } +} + +pub mod selftest { + use super::*; + + static PLAINTEXT: &'static [u8] = b"\ + Ladies and Gentlemen of the class of '99: If I could offer you o\ + nly one tip for the future, sunscreen would be it."; + + static AAD: &'static [u8] = &[0x50, 0x51, 0x52, 0x53, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7]; + + static KEY: &'static [u8] = &[ + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f]; + + static NONCE: &'static [u8] = &[0x07, 0x00, 0x00, 0x00, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47]; + + static CIPHERTEXT: &'static [u8] = &[ + 0xd3, 0x1a, 0x8d, 0x34, 0x64, 0x8e, 0x60, 0xdb, + 0x7b, 0x86, 0xaf, 0xbc, 0x53, 0xef, 0x7e, 0xc2, + 0xa4, 0xad, 0xed, 0x51, 0x29, 0x6e, 0x08, 0xfe, + 0xa9, 0xe2, 0xb5, 0xa7, 0x36, 0xee, 0x62, 0xd6, + 0x3d, 0xbe, 0xa4, 0x5e, 0x8c, 0xa9, 0x67, 0x12, + 0x82, 0xfa, 0xfb, 0x69, 0xda, 0x92, 0x72, 0x8b, + 0x1a, 0x71, 0xde, 0x0a, 0x9e, 0x06, 0x0b, 0x29, + 0x05, 0xd6, 0xa5, 0xb6, 0x7e, 0xcd, 0x3b, 0x36, + 0x92, 0xdd, 0xbd, 0x7f, 0x2d, 0x77, 0x8b, 0x8c, + 0x98, 0x03, 0xae, 0xe3, 0x28, 0x09, 0x1b, 0x58, + 0xfa, 0xb3, 0x24, 0xe4, 0xfa, 0xd6, 0x75, 0x94, + 0x55, 0x85, 0x80, 0x8b, 0x48, 0x31, 0xd7, 0xbc, + 0x3f, 0xf4, 0xde, 0xf0, 0x8e, 0x4b, 0x7a, 0x9d, + 0xe5, 0x76, 0xd2, 0x65, 0x86, 0xce, 0xc6, 0x4b, + 0x61, 0x16]; + + static TAG: &'static [u8] = &[ + 0x1a, 0xe1, 0x0b, 0x59, 0x4f, 0x09, 0xe2, 0x6a, + 0x7e, 0x90, 0x2e, 0xcb, 0xd0, 0x60, 0x06, 0x91]; + + #[cold] + pub fn selftest() { + selftest_encrypt(); + selftest_decrypt(); + selftest_decrypt_mismatch(); + } + + #[cold] + pub fn selftest_encrypt() { + let mut output = Vec::with_capacity(PLAINTEXT.len()); + let tag = encrypt(KEY, NONCE, AAD, PLAINTEXT, &mut output) + .expect("selftest failure"); + + assert_eq!(&output[..], CIPHERTEXT); + assert_eq!(tag, TAG); + } + + #[cold] + pub fn selftest_decrypt() { + let mut output = Vec::with_capacity(CIPHERTEXT.len()); + decrypt(KEY, NONCE, AAD, CIPHERTEXT, TAG, &mut output) + .expect("selftest failure"); + + assert_eq!(&output[..], PLAINTEXT); + } + + #[cold] + pub fn selftest_decrypt_mismatch() { + let mut output = Vec::with_capacity(0); + let result = decrypt(KEY, NONCE, AAD, CIPHERTEXT, &[0; 16], + &mut output); + + if let Err(DecryptError::TagMismatch) = result { + assert!(output.is_empty()); + } else { + panic!("selftest failure"); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn selftest_encrypt() { + selftest::selftest_encrypt(); + } + + #[test] + fn selftest_decrypt() { + selftest::selftest_decrypt(); + } + + #[test] + fn selftest_decrypt_mismatch() { + selftest::selftest_decrypt_mismatch(); + } + + #[test] + fn test_encrypt() { + let mut output = Vec::with_capacity(PLAINTEXT.len()); + let tag = encrypt(KEY, NONCE, AAD, PLAINTEXT.as_bytes(), + &mut output).expect("test failed"); + assert_eq!(&output[..], CIPHERTEXT); + assert_eq!(tag, TAG); + } + + #[test] + fn test_decrypt() { + let mut output = Vec::with_capacity(CIPHERTEXT.len()); + decrypt(KEY, NONCE, AAD, CIPHERTEXT, TAG, + &mut output).expect("test failed"); + assert_eq!(&output[..], PLAINTEXT.as_bytes()); + } + + static KEY: &'static [u8] = &[ + 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, + 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0, + 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09, + 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0]; + + static CIPHERTEXT: &'static [u8] = &[ + 0x64, 0xa0, 0x86, 0x15, 0x75, 0x86, 0x1a, 0xf4, + 0x60, 0xf0, 0x62, 0xc7, 0x9b, 0xe6, 0x43, 0xbd, + 0x5e, 0x80, 0x5c, 0xfd, 0x34, 0x5c, 0xf3, 0x89, + 0xf1, 0x08, 0x67, 0x0a, 0xc7, 0x6c, 0x8c, 0xb2, + 0x4c, 0x6c, 0xfc, 0x18, 0x75, 0x5d, 0x43, 0xee, + 0xa0, 0x9e, 0xe9, 0x4e, 0x38, 0x2d, 0x26, 0xb0, + 0xbd, 0xb7, 0xb7, 0x3c, 0x32, 0x1b, 0x01, 0x00, + 0xd4, 0xf0, 0x3b, 0x7f, 0x35, 0x58, 0x94, 0xcf, + 0x33, 0x2f, 0x83, 0x0e, 0x71, 0x0b, 0x97, 0xce, + 0x98, 0xc8, 0xa8, 0x4a, 0xbd, 0x0b, 0x94, 0x81, + 0x14, 0xad, 0x17, 0x6e, 0x00, 0x8d, 0x33, 0xbd, + 0x60, 0xf9, 0x82, 0xb1, 0xff, 0x37, 0xc8, 0x55, + 0x97, 0x97, 0xa0, 0x6e, 0xf4, 0xf0, 0xef, 0x61, + 0xc1, 0x86, 0x32, 0x4e, 0x2b, 0x35, 0x06, 0x38, + 0x36, 0x06, 0x90, 0x7b, 0x6a, 0x7c, 0x02, 0xb0, + 0xf9, 0xf6, 0x15, 0x7b, 0x53, 0xc8, 0x67, 0xe4, + 0xb9, 0x16, 0x6c, 0x76, 0x7b, 0x80, 0x4d, 0x46, + 0xa5, 0x9b, 0x52, 0x16, 0xcd, 0xe7, 0xa4, 0xe9, + 0x90, 0x40, 0xc5, 0xa4, 0x04, 0x33, 0x22, 0x5e, + 0xe2, 0x82, 0xa1, 0xb0, 0xa0, 0x6c, 0x52, 0x3e, + 0xaf, 0x45, 0x34, 0xd7, 0xf8, 0x3f, 0xa1, 0x15, + 0x5b, 0x00, 0x47, 0x71, 0x8c, 0xbc, 0x54, 0x6a, + 0x0d, 0x07, 0x2b, 0x04, 0xb3, 0x56, 0x4e, 0xea, + 0x1b, 0x42, 0x22, 0x73, 0xf5, 0x48, 0x27, 0x1a, + 0x0b, 0xb2, 0x31, 0x60, 0x53, 0xfa, 0x76, 0x99, + 0x19, 0x55, 0xeb, 0xd6, 0x31, 0x59, 0x43, 0x4e, + 0xce, 0xbb, 0x4e, 0x46, 0x6d, 0xae, 0x5a, 0x10, + 0x73, 0xa6, 0x72, 0x76, 0x27, 0x09, 0x7a, 0x10, + 0x49, 0xe6, 0x17, 0xd9, 0x1d, 0x36, 0x10, 0x94, + 0xfa, 0x68, 0xf0, 0xff, 0x77, 0x98, 0x71, 0x30, + 0x30, 0x5b, 0xea, 0xba, 0x2e, 0xda, 0x04, 0xdf, + 0x99, 0x7b, 0x71, 0x4d, 0x6c, 0x6f, 0x2c, 0x29, + 0xa6, 0xad, 0x5c, 0xb4, 0x02, 0x2b, 0x02, 0x70, + 0x9b]; + + static NONCE: &'static [u8] = &[0x00, 0x00, 0x00, 0x00, + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08]; + + static AAD: &'static [u8] = &[0xf3, 0x33, 0x88, 0x86, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x91]; + + static TAG: &'static [u8] = &[ + 0xee, 0xad, 0x9d, 0x67, 0x89, 0x0c, 0xbb, 0x22, + 0x39, 0x23, 0x36, 0xfe, 0xa1, 0x85, 0x1f, 0x38]; + + static PLAINTEXT: &'static str = "\ + Internet-Drafts are draft documents valid for a maximum of six m\ + onths and may be updated, replaced, or obsoleted by other docume\ + nts at any time. It is inappropriate to use Internet-Drafts as r\ + eference material or to cite them other than as /\u{201c}work in prog\ + ress./\u{201d}"; +} + +#[cfg(all(feature = "bench", test))] +mod bench { + use test::{Bencher, black_box}; + use super::*; + + #[cfg_attr(feature = "clippy", allow(result_unwrap_used))] + fn bench_encrypt(b: &mut Bencher, aad: &[u8], data: &[u8]) { + let key = [!0; 32]; + let nonce = [!0; 12]; + + let mut buf = Vec::with_capacity(data.len()); + + b.bytes = data.len() as u64; + b.iter(|| { + buf.clear(); + encrypt(black_box(&key), black_box(&nonce), + black_box(aad), black_box(data), + black_box(&mut buf)).unwrap() + }) + } + + #[cfg_attr(feature = "clippy", allow(result_unwrap_used))] + fn bench_decrypt(b: &mut Bencher, aad: &[u8], data: &[u8]) { + let key = [!0; 32]; + let nonce = [!0; 12]; + + let mut ciphertext = Vec::with_capacity(data.len()); + let tag = encrypt(&key, &nonce, aad, data, &mut ciphertext).unwrap(); + let input = &ciphertext[..]; + + let mut buf = Vec::with_capacity(data.len()); + + b.bytes = data.len() as u64; + b.iter(|| { + buf.clear(); + decrypt(black_box(&key), black_box(&nonce), + black_box(aad), black_box(input), black_box(&tag), + black_box(&mut buf)).unwrap() + }) + } + + #[bench] + fn bench_encrypt_16(b: &mut Bencher) { + bench_encrypt(b, &[!0; 16], &[!0; 16]) + } + + #[bench] + fn bench_encrypt_4k(b: &mut Bencher) { + bench_encrypt(b, &[!0; 16], &[!0; 4096]) + } + + #[bench] + fn bench_encrypt_64k(b: &mut Bencher) { + bench_encrypt(b, &[!0; 16], &[!0; 65536]) + } + + #[bench] + fn bench_decrypt_16(b: &mut Bencher) { + bench_decrypt(b, &[!0; 16], &[!0; 16]) + } + + #[bench] + fn bench_decrypt_4k(b: &mut Bencher) { + bench_decrypt(b, &[!0; 16], &[!0; 4096]) + } + + #[bench] + fn bench_decrypt_64k(b: &mut Bencher) { + bench_decrypt(b, &[!0; 16], &[!0; 65536]) + } +} diff --git a/src/as_bytes.rs b/src/as_bytes.rs new file mode 100644 index 0000000..bb61cdf --- /dev/null +++ b/src/as_bytes.rs @@ -0,0 +1,43 @@ +// Copyright 2016 chacha20-poly1305-aead Developers +// +// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be +// copied, modified, or distributed except according to those terms. + +use std::mem; +use std::slice; + +pub unsafe trait Safe {} + +pub trait AsBytes { + fn as_bytes(&self) -> &[u8]; + fn as_mut_bytes(&mut self) -> &mut [u8]; +} + +impl AsBytes for [T] { + #[inline] + fn as_bytes(&self) -> &[u8] { + unsafe { + slice::from_raw_parts(self.as_ptr() as *const u8, + self.len() * mem::size_of::()) + } + } + + #[inline] + fn as_mut_bytes(&mut self) -> &mut [u8] { + unsafe { + slice::from_raw_parts_mut(self.as_mut_ptr() as *mut u8, + self.len() * mem::size_of::()) + } + } +} + +unsafe impl Safe for u8 {} +unsafe impl Safe for u16 {} +unsafe impl Safe for u32 {} +unsafe impl Safe for u64 {} +unsafe impl Safe for i8 {} +unsafe impl Safe for i16 {} +unsafe impl Safe for i32 {} +unsafe impl Safe for i64 {} diff --git a/src/chacha20.rs b/src/chacha20.rs new file mode 100644 index 0000000..08d9bf7 --- /dev/null +++ b/src/chacha20.rs @@ -0,0 +1,250 @@ +// Copyright 2016 chacha20-poly1305-aead Developers +// +// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be +// copied, modified, or distributed except according to those terms. + +use crate::as_bytes::AsBytes; +use crate::simd::{Vector4, u32x4}; + +#[derive(Clone, Debug)] +pub struct ChaCha20 { + state: [u32x4; 3] +} + +#[cfg_attr(feature = "clippy", allow(should_implement_trait))] +impl ChaCha20 { + pub fn new(key: &[u8], nonce: &[u8]) -> Self { + Self::with_counter(key, nonce, 0) + } + + pub fn with_counter(key: &[u8], nonce: &[u8], counter: u32) -> Self { + assert!(key.len() == 32); + assert!(nonce.len() == 12); + + let mut k = [u32x4::default(); 2]; + k.as_mut_bytes().clone_from_slice(key); + + let mut n = [0; 3]; + n.as_mut_bytes().clone_from_slice(nonce); + + ChaCha20 { + state: [ + k[0].from_le(), + k[1].from_le(), + u32x4::new(counter.to_le(), n[0], n[1], n[2]).from_le(), + ] + } + } + + fn round(state: &mut [u32x4; 4]) { + state[0] = state[0].wrapping_add(state[1]); + state[3] = (state[3] ^ state[0]).rotate_left_const(16); + + state[2] = state[2].wrapping_add(state[3]); + state[1] = (state[1] ^ state[2]).rotate_left_const(12); + + state[0] = state[0].wrapping_add(state[1]); + state[3] = (state[3] ^ state[0]).rotate_left_const(8); + + state[2] = state[2].wrapping_add(state[3]); + state[1] = (state[1] ^ state[2]).rotate_left_const(7); + } + + fn shuffle(state: &mut [u32x4; 4]) { + state[1] = state[1].shuffle_left_1(); + state[2] = state[2].shuffle_left_2(); + state[3] = state[3].shuffle_left_3(); + } + + fn unshuffle(state: &mut [u32x4; 4]) { + state[1] = state[1].shuffle_right_1(); + state[2] = state[2].shuffle_right_2(); + state[3] = state[3].shuffle_right_3(); + } + + fn round_pair(state: &mut [u32x4; 4]) { + ChaCha20::round(state); + ChaCha20::shuffle(state); + ChaCha20::round(state); + ChaCha20::unshuffle(state); + } + + fn block(&self) -> [u32x4; 4] { + let c = u32x4::new(0x61707865, 0x3320646e, 0x79622d32, 0x6b206574); + let mut state = [c, self.state[0], self.state[1], self.state[2]]; + + ChaCha20::round_pair(&mut state); + ChaCha20::round_pair(&mut state); + ChaCha20::round_pair(&mut state); + ChaCha20::round_pair(&mut state); + ChaCha20::round_pair(&mut state); + ChaCha20::round_pair(&mut state); + ChaCha20::round_pair(&mut state); + ChaCha20::round_pair(&mut state); + ChaCha20::round_pair(&mut state); + ChaCha20::round_pair(&mut state); + + [ + state[0].wrapping_add(c).to_le(), + state[1].wrapping_add(self.state[0]).to_le(), + state[2].wrapping_add(self.state[1]).to_le(), + state[3].wrapping_add(self.state[2]).to_le(), + ] + } + + pub fn next(&mut self) -> [u32x4; 4] { + let block = self.block(); + self.state[2].0 = self.state[2].0.wrapping_add(1); + block + } +} + +/// Runs the self-test for the chacha20 block function. +#[cold] +pub fn selftest() { + let key = [0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f]; + let nonce = [0x00, 0x00, 0x00, 0x09, + 0x00, 0x00, 0x00, 0x4a, + 0x00, 0x00, 0x00, 0x00]; + let expected = [0x10, 0xf1, 0xe7, 0xe4, 0xd1, 0x3b, 0x59, 0x15, + 0x50, 0x0f, 0xdd, 0x1f, 0xa3, 0x20, 0x71, 0xc4, + 0xc7, 0xd1, 0xf4, 0xc7, 0x33, 0xc0, 0x68, 0x03, + 0x04, 0x22, 0xaa, 0x9a, 0xc3, 0xd4, 0x6c, 0x4e, + 0xd2, 0x82, 0x64, 0x46, 0x07, 0x9f, 0xaa, 0x09, + 0x14, 0xc2, 0xd7, 0x05, 0xd9, 0x8b, 0x02, 0xa2, + 0xb5, 0x12, 0x9c, 0xd1, 0xde, 0x16, 0x4e, 0xb9, + 0xcb, 0xd0, 0x83, 0xe8, 0xa2, 0x50, 0x3c, 0x4e]; + + let mut state = ChaCha20::with_counter(&key, &nonce, 1); + let block = state.next(); + assert_eq!(block.as_bytes(), &expected[..]); +} + +#[cfg(test)] +mod tests { + use as_bytes::AsBytes; + use super::ChaCha20; + + #[test] + fn selftest() { + super::selftest(); + } + + #[test] + fn test_vector_1_and_2() { + let mut state = ChaCha20::new(&[0; 32], &[0; 12]); + + assert_eq!(state.next().as_bytes(), + &[0x76, 0xb8, 0xe0, 0xad, 0xa0, 0xf1, 0x3d, 0x90, + 0x40, 0x5d, 0x6a, 0xe5, 0x53, 0x86, 0xbd, 0x28, + 0xbd, 0xd2, 0x19, 0xb8, 0xa0, 0x8d, 0xed, 0x1a, + 0xa8, 0x36, 0xef, 0xcc, 0x8b, 0x77, 0x0d, 0xc7, + 0xda, 0x41, 0x59, 0x7c, 0x51, 0x57, 0x48, 0x8d, + 0x77, 0x24, 0xe0, 0x3f, 0xb8, 0xd8, 0x4a, 0x37, + 0x6a, 0x43, 0xb8, 0xf4, 0x15, 0x18, 0xa1, 0x1c, + 0xc3, 0x87, 0xb6, 0x69, 0xb2, 0xee, 0x65, 0x86][..]); + + assert_eq!(state.next().as_bytes(), + &[0x9f, 0x07, 0xe7, 0xbe, 0x55, 0x51, 0x38, 0x7a, + 0x98, 0xba, 0x97, 0x7c, 0x73, 0x2d, 0x08, 0x0d, + 0xcb, 0x0f, 0x29, 0xa0, 0x48, 0xe3, 0x65, 0x69, + 0x12, 0xc6, 0x53, 0x3e, 0x32, 0xee, 0x7a, 0xed, + 0x29, 0xb7, 0x21, 0x76, 0x9c, 0xe6, 0x4e, 0x43, + 0xd5, 0x71, 0x33, 0xb0, 0x74, 0xd8, 0x39, 0xd5, + 0x31, 0xed, 0x1f, 0x28, 0x51, 0x0a, 0xfb, 0x45, + 0xac, 0xe1, 0x0a, 0x1f, 0x4b, 0x79, 0x4d, 0x6f][..]); + } + + #[test] + fn test_vector_3() { + let key = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01]; + + let mut state = ChaCha20::with_counter(&key, &[0; 12], 1); + + assert_eq!(state.next().as_bytes(), + &[0x3a, 0xeb, 0x52, 0x24, 0xec, 0xf8, 0x49, 0x92, + 0x9b, 0x9d, 0x82, 0x8d, 0xb1, 0xce, 0xd4, 0xdd, + 0x83, 0x20, 0x25, 0xe8, 0x01, 0x8b, 0x81, 0x60, + 0xb8, 0x22, 0x84, 0xf3, 0xc9, 0x49, 0xaa, 0x5a, + 0x8e, 0xca, 0x00, 0xbb, 0xb4, 0xa7, 0x3b, 0xda, + 0xd1, 0x92, 0xb5, 0xc4, 0x2f, 0x73, 0xf2, 0xfd, + 0x4e, 0x27, 0x36, 0x44, 0xc8, 0xb3, 0x61, 0x25, + 0xa6, 0x4a, 0xdd, 0xeb, 0x00, 0x6c, 0x13, 0xa0][..]); + } + + #[test] + fn test_vector_4() { + let key = [0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + + let mut state = ChaCha20::with_counter(&key, &[0; 12], 2); + + assert_eq!(state.next().as_bytes(), + &[0x72, 0xd5, 0x4d, 0xfb, 0xf1, 0x2e, 0xc4, 0x4b, + 0x36, 0x26, 0x92, 0xdf, 0x94, 0x13, 0x7f, 0x32, + 0x8f, 0xea, 0x8d, 0xa7, 0x39, 0x90, 0x26, 0x5e, + 0xc1, 0xbb, 0xbe, 0xa1, 0xae, 0x9a, 0xf0, 0xca, + 0x13, 0xb2, 0x5a, 0xa2, 0x6c, 0xb4, 0xa6, 0x48, + 0xcb, 0x9b, 0x9d, 0x1b, 0xe6, 0x5b, 0x2c, 0x09, + 0x24, 0xa6, 0x6c, 0x54, 0xd5, 0x45, 0xec, 0x1b, + 0x73, 0x74, 0xf4, 0x87, 0x2e, 0x99, 0xf0, 0x96][..]); + } + + #[test] + fn test_vector_5() { + let nonce = [0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02]; + + let mut state = ChaCha20::with_counter(&[0; 32], &nonce, 0); + + assert_eq!(state.next().as_bytes(), + &[0xc2, 0xc6, 0x4d, 0x37, 0x8c, 0xd5, 0x36, 0x37, + 0x4a, 0xe2, 0x04, 0xb9, 0xef, 0x93, 0x3f, 0xcd, + 0x1a, 0x8b, 0x22, 0x88, 0xb3, 0xdf, 0xa4, 0x96, + 0x72, 0xab, 0x76, 0x5b, 0x54, 0xee, 0x27, 0xc7, + 0x8a, 0x97, 0x0e, 0x0e, 0x95, 0x5c, 0x14, 0xf3, + 0xa8, 0x8e, 0x74, 0x1b, 0x97, 0xc2, 0x86, 0xf7, + 0x5f, 0x8f, 0xc2, 0x99, 0xe8, 0x14, 0x83, 0x62, + 0xfa, 0x19, 0x8a, 0x39, 0x53, 0x1b, 0xed, 0x6d][..]); + } +} + +#[cfg(all(feature = "bench", test))] +mod bench { + use test::{Bencher, black_box}; + use super::ChaCha20; + + #[bench] + fn bench_new(b: &mut Bencher) { + let key = [!0; 32]; + let nonce = [!0; 12]; + let mut counter = 0; + + b.bytes = 48; + b.iter(|| { + counter += 1; + ChaCha20::with_counter(black_box(&key), black_box(&nonce), counter) + }) + } + + #[bench] + fn bench_block(b: &mut Bencher) { + let mut state = ChaCha20::new(&[!0; 32], &[!0; 12]); + + b.bytes = 64; + b.iter(|| { + state.next() + }) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..c672368 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,60 @@ +// Copyright 2016 chacha20-poly1305-aead Developers +// +// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be +// copied, modified, or distributed except according to those terms. + +//! A pure Rust implementation of the ChaCha20-Poly1305 AEAD from RFC 7539. +//! +//! An Authenticated Encryption with Associated Data (AEAD) mode +//! encrypts data and generates an authentication tag, or decrypts data +//! and verifies an authentication tag, as a single operation. The tag +//! can also validate additional authenticated data (AAD) which is not +//! included in the cyphertext, for instance a plaintext header. +//! +//! The ChaCha20-Poly1305 AEAD uses a 256-bit (32-byte) key, and a +//! 96-bit (12-byte) nonce. For each key, a given nonce should be used +//! only once, otherwise the encryption and authentication can be +//! broken. One way to prevent reuse is for the nonce to contain a +//! sequence number. +//! +//! The amount of data that can be encrypted in a single call is 2^32 - 1 +//! blocks of 64 bytes, slightly less than 256 GiB. + +#![warn(missing_docs)] + +#![cfg_attr(feature = "clippy", feature(plugin))] +#![cfg_attr(feature = "clippy", plugin(clippy))] +#![cfg_attr(feature = "clippy", warn(clippy_pedantic))] + +#![cfg_attr(all(feature = "bench", test), feature(test))] +#![cfg_attr(feature = "simd", feature(platform_intrinsics, repr_simd))] +#![cfg_attr(feature = "simd_opt", feature(cfg_target_feature))] + +#[cfg(all(feature = "bench", test))] +extern crate test; + +extern crate constant_time_eq; + +mod as_bytes; + +mod simdty; +mod simdint; +mod simdop; +mod simd_opt; +mod simd; + +mod chacha20; +mod poly1305; +mod aead; + +pub use aead::{DecryptError, decrypt, encrypt, encrypt_read}; + +/// Runs the self-test for ChaCha20, Poly1305, and the AEAD. +#[cold] +pub fn selftest() { + chacha20::selftest(); + poly1305::selftest(); + aead::selftest::selftest(); +} diff --git a/src/poly1305.rs b/src/poly1305.rs new file mode 100644 index 0000000..dfb2dce --- /dev/null +++ b/src/poly1305.rs @@ -0,0 +1,536 @@ +// Copyright 2016 chacha20-poly1305-aead Developers +// +// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be +// copied, modified, or distributed except according to those terms. + +// The 130-bit accumulator is split into five 26-bit limbs, with the +// carry between the limbs delayed. +// +// The reduction steps use the following identity: +// +// a×2^n ≡ a×c (mod 2^n−c) +// +// For Poly1305, the identity becomes: +// +// a×2^130 ≡ a×5 (mod 2^130−5) +// +// That is, any limb or carry above 2^130 is multiplied by 5 and added +// back to the lower limbs. +// +// Based on the algorithm from https://github.com/floodyberry/poly1305-donna + +#[derive(Clone, Debug)] +pub struct Poly1305 { + /// Accumulator: 5x26-bit + a: [u32; 5], + /// Multiplier: 5x26-bit + r: [u32; 5], + /// Secret key: 4x32-bit + s: [u32; 4], +} + +impl Poly1305 { + pub fn new(key: &[u8]) -> Self { + assert!(key.len() == 32); + + Poly1305 { + a: [0; 5], + + // r &= 0x0ffffffc_0ffffffc_0ffffffc_0fffffff; + r: [u32_from_le(&key[ 0.. 4]) & 0x03ffffff, + u32_from_le(&key[ 3.. 7]) >> 2 & 0x03ffff03, + u32_from_le(&key[ 6..10]) >> 4 & 0x03ffc0ff, + u32_from_le(&key[ 9..13]) >> 6 & 0x03f03fff, + u32_from_le(&key[12..16]) >> 8 & 0x000fffff], + + s: [u32_from_le(&key[16..20]), + u32_from_le(&key[20..24]), + u32_from_le(&key[24..28]), + u32_from_le(&key[28..32])], + } + } + + pub fn block(&mut self, msg: &[u8]) { + assert!(msg.len() == 16); + self.accumulate(u32_from_le(&msg[ 0.. 4]) & 0x03ffffff, + u32_from_le(&msg[ 3.. 7]) >> 2 & 0x03ffffff, + u32_from_le(&msg[ 6..10]) >> 4 & 0x03ffffff, + u32_from_le(&msg[ 9..13]) >> 6 & 0x03ffffff, + u32_from_le(&msg[12..16]) >> 8 | (1 << 24)); + } + + pub fn last_block(mut self, msg: &[u8]) -> [u32; 4] { + if !msg.is_empty() { + assert!(msg.len() <= 16); + + let mut buf = [0; 17]; + buf[..msg.len()].clone_from_slice(msg); + buf[msg.len()] = 1; + + self.accumulate(u32_from_le(&buf[ 0.. 4]) & 0x03ffffff, + u32_from_le(&buf[ 3.. 7]) >> 2 & 0x03ffffff, + u32_from_le(&buf[ 6..10]) >> 4 & 0x03ffffff, + u32_from_le(&buf[ 9..13]) >> 6 & 0x03ffffff, + u32_from_le(&buf[13..17])); + } + + self.tag() + } + + fn padded_block(&mut self, msg: &[u8]) { + assert!(msg.len() <= 16); + let mut buf = [0; 16]; + buf[..msg.len()].clone_from_slice(msg); + self.block(&buf); + } + + pub fn padded_blocks(&mut self, mut msg: &[u8]) { + while msg.len() >= 16 { + self.block(&msg[..16]); + msg = &msg[16..]; + } + if !msg.is_empty() { + self.padded_block(msg); + } + } + + fn accumulate(&mut self, n0: u32, n1: u32, n2: u32, n3: u32, n4: u32) { + self.a[0] += n0; + self.a[1] += n1; + self.a[2] += n2; + self.a[3] += n3; + self.a[4] += n4; + self.mul_r_mod_p(); + } + + #[cfg_attr(feature = "clippy", allow(cast_possible_truncation))] + fn mul_r_mod_p(&mut self) { + // t = r * a; high limbs multiplied by 5 and added to low limbs + let mut t = [0; 5]; + + t[0] += self.r[0] as u64 * self.a[0] as u64; + t[1] += self.r[0] as u64 * self.a[1] as u64; + t[2] += self.r[0] as u64 * self.a[2] as u64; + t[3] += self.r[0] as u64 * self.a[3] as u64; + t[4] += self.r[0] as u64 * self.a[4] as u64; + + t[0] += (5 * self.r[1]) as u64 * self.a[4] as u64; + t[1] += self.r[1] as u64 * self.a[0] as u64; + t[2] += self.r[1] as u64 * self.a[1] as u64; + t[3] += self.r[1] as u64 * self.a[2] as u64; + t[4] += self.r[1] as u64 * self.a[3] as u64; + + t[0] += (5 * self.r[2]) as u64 * self.a[3] as u64; + t[1] += (5 * self.r[2]) as u64 * self.a[4] as u64; + t[2] += self.r[2] as u64 * self.a[0] as u64; + t[3] += self.r[2] as u64 * self.a[1] as u64; + t[4] += self.r[2] as u64 * self.a[2] as u64; + + t[0] += (5 * self.r[3]) as u64 * self.a[2] as u64; + t[1] += (5 * self.r[3]) as u64 * self.a[3] as u64; + t[2] += (5 * self.r[3]) as u64 * self.a[4] as u64; + t[3] += self.r[3] as u64 * self.a[0] as u64; + t[4] += self.r[3] as u64 * self.a[1] as u64; + + t[0] += (5 * self.r[4]) as u64 * self.a[1] as u64; + t[1] += (5 * self.r[4]) as u64 * self.a[2] as u64; + t[2] += (5 * self.r[4]) as u64 * self.a[3] as u64; + t[3] += (5 * self.r[4]) as u64 * self.a[4] as u64; + t[4] += self.r[4] as u64 * self.a[0] as u64; + + // propagate carries + t[1] += t[0] >> 26; + t[2] += t[1] >> 26; + t[3] += t[2] >> 26; + t[4] += t[3] >> 26; + + // mask out carries + self.a[0] = t[0] as u32 & 0x03ffffff; + self.a[1] = t[1] as u32 & 0x03ffffff; + self.a[2] = t[2] as u32 & 0x03ffffff; + self.a[3] = t[3] as u32 & 0x03ffffff; + self.a[4] = t[4] as u32 & 0x03ffffff; + + // propagate high limb carry + self.a[0] += (t[4] >> 26) as u32 * 5; + self.a[1] += self.a[0] >> 26; + + // mask out carries + self.a[0] &= 0x03ffffff; + + // A carry of at most 1 bit has been left in self.a[1] + } + + fn propagate_carries(&mut self) { + // propagate carries + self.a[2] += self.a[1] >> 26; + self.a[3] += self.a[2] >> 26; + self.a[4] += self.a[3] >> 26; + self.a[0] += (self.a[4] >> 26) * 5; + self.a[1] += self.a[0] >> 26; + + // mask out carries + self.a[0] &= 0x03ffffff; + self.a[1] &= 0x03ffffff; + self.a[2] &= 0x03ffffff; + self.a[3] &= 0x03ffffff; + self.a[4] &= 0x03ffffff; + } + + fn reduce_mod_p(&mut self) { + self.propagate_carries(); + + let mut t = self.a; + + // t = a - p + t[0] += 5; + t[4] = t[4].wrapping_sub(1 << 26); + + // propagate carries + t[1] += t[0] >> 26; + t[2] += t[1] >> 26; + t[3] += t[2] >> 26; + t[4] = t[4].wrapping_add(t[3] >> 26); + + // mask out carries + t[0] &= 0x03ffffff; + t[1] &= 0x03ffffff; + t[2] &= 0x03ffffff; + t[3] &= 0x03ffffff; + + // constant-time select between (a - p) if non-negative, (a) otherwise + let mask = (t[4] >> 31).wrapping_sub(1); + self.a[0] = t[0] & mask | self.a[0] & !mask; + self.a[1] = t[1] & mask | self.a[1] & !mask; + self.a[2] = t[2] & mask | self.a[2] & !mask; + self.a[3] = t[3] & mask | self.a[3] & !mask; + self.a[4] = t[4] & mask | self.a[4] & !mask; + } + + #[cfg_attr(feature = "clippy", allow(cast_possible_truncation))] + pub fn tag(mut self) -> [u32; 4] { + self.reduce_mod_p(); + + // convert from 5x26-bit to 4x32-bit + let a = [self.a[0] | self.a[1] << 26, + self.a[1] >> 6 | self.a[2] << 20, + self.a[2] >> 12 | self.a[3] << 14, + self.a[3] >> 18 | self.a[4] << 8]; + + // t = a + s + let mut t = [a[0] as u64 + self.s[0] as u64, + a[1] as u64 + self.s[1] as u64, + a[2] as u64 + self.s[2] as u64, + a[3] as u64 + self.s[3] as u64]; + + // propagate carries + t[1] += t[0] >> 32; + t[2] += t[1] >> 32; + t[3] += t[2] >> 32; + + // mask out carries + [(t[0] as u32).to_le(), + (t[1] as u32).to_le(), + (t[2] as u32).to_le(), + (t[3] as u32).to_le()] + } +} + +#[inline] +fn u32_from_le(src: &[u8]) -> u32 { + use std::mem::size_of; + use std::ptr::copy_nonoverlapping; + + assert!(src.len() == size_of::()); + unsafe { + let mut value = 0; + copy_nonoverlapping(src.as_ptr(), + &mut value as *mut u32 as *mut u8, + size_of::()); + u32::from_le(value) + } +} + +/// Runs the self-test for the poly1305 authenticator. +#[cold] +pub fn selftest() { + use crate::as_bytes::AsBytes; + + let key = [0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33, + 0x7f, 0x44, 0x52, 0xfe, 0x42, 0xd5, 0x06, 0xa8, + 0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d, 0xb2, 0xfd, + 0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b]; + let msg = b"Cryptographic Forum Research Group"; + let expected = [0xa8, 0x06, 0x1d, 0xc1, 0x30, 0x51, 0x36, 0xc6, + 0xc2, 0x2b, 0x8b, 0xaf, 0x0c, 0x01, 0x27, 0xa9]; + + let mut state = Poly1305::new(&key); + state.block(&msg[ 0..16]); + state.block(&msg[16..32]); + let tag = state.last_block(&msg[32..]); + + assert_eq!(tag.as_bytes(), expected); +} + +#[cfg(test)] +mod tests { + use as_bytes::AsBytes; + use super::Poly1305; + + #[test] + fn selftest() { + super::selftest(); + } + + #[test] + fn test_vector_1() { + let mut state = Poly1305::new(&[0; 32]); + state.block(&[0; 16]); + state.block(&[0; 16]); + state.block(&[0; 16]); + state.block(&[0; 16]); + assert_eq!(state.tag().as_bytes(), &[0; 16]); + } + + static TEXT: &'static [u8] = b"\ + Any submission to the IETF intended by the Contributor for publi\ + cation as all or part of an IETF Internet-Draft or RFC and any s\ + tatement made within the context of an IETF activity is consider\ + ed an \"IETF Contribution\". Such statements include oral statemen\ + ts in IETF sessions, as well as written and electronic communica\ + tions made at any time or place, which are addressed to"; + + #[test] + fn test_vector_2() { + let key = [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x36, 0xe5, 0xf6, 0xb5, 0xc5, 0xe0, 0x60, 0x70, + 0xf0, 0xef, 0xca, 0x96, 0x22, 0x7a, 0x86, 0x3e]; + let mut msg = TEXT; + + let mut state = Poly1305::new(&key); + while msg.len() >= 16 { + state.block(&msg[..16]); + msg = &msg[16..]; + } + let tag = state.last_block(msg); + + assert_eq!(tag.as_bytes(), + &[0x36, 0xe5, 0xf6, 0xb5, 0xc5, 0xe0, 0x60, 0x70, + 0xf0, 0xef, 0xca, 0x96, 0x22, 0x7a, 0x86, 0x3e]); + } + + #[test] + fn test_vector_3() { + let key = [0x36, 0xe5, 0xf6, 0xb5, 0xc5, 0xe0, 0x60, 0x70, + 0xf0, 0xef, 0xca, 0x96, 0x22, 0x7a, 0x86, 0x3e, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + let mut msg = TEXT; + + let mut state = Poly1305::new(&key); + while msg.len() >= 16 { + state.block(&msg[..16]); + msg = &msg[16..]; + } + let tag = state.last_block(msg); + + assert_eq!(tag.as_bytes(), + &[0xf3, 0x47, 0x7e, 0x7c, 0xd9, 0x54, 0x17, 0xaf, + 0x89, 0xa6, 0xb8, 0x79, 0x4c, 0x31, 0x0c, 0xf0]); + } + + #[test] + fn test_vector_4() { + let key = [0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, + 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0, + 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09, + 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0]; + let mut msg: &[u8] = b"\ + 'Twas brillig, and the slithy toves\nDid gyre and gimble in the w\ + abe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe."; + + let mut state = Poly1305::new(&key); + while msg.len() >= 16 { + state.block(&msg[..16]); + msg = &msg[16..]; + } + let tag = state.last_block(msg); + + assert_eq!(tag.as_bytes(), + &[0x45, 0x41, 0x66, 0x9a, 0x7e, 0xaa, 0xee, 0x61, + 0xe7, 0x08, 0xdc, 0x7c, 0xbc, 0xc5, 0xeb, 0x62]); + } + + #[test] + fn test_vector_5() { + let key = [0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + + let mut state = Poly1305::new(&key); + state.block(&[0xff; 16]); + + assert_eq!(state.tag().as_bytes(), + &[0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]); + } + + #[test] + fn test_vector_6() { + let key = [0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]; + + let mut state = Poly1305::new(&key); + state.block(&[0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]); + + assert_eq!(state.tag().as_bytes(), + &[0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]); + } + + #[test] + fn test_vector_7() { + let key = [0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + + let mut state = Poly1305::new(&key); + state.block(&[0xff; 16]); + state.block(&[0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]); + state.block(&[0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]); + + assert_eq!(state.tag().as_bytes(), + &[0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]); + } + + #[test] + fn test_vector_8() { + let key = [0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + + let mut state = Poly1305::new(&key); + state.block(&[0xff; 16]); + state.block(&[0xfb, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, + 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe]); + state.block(&[0x01; 16]); + + assert_eq!(state.tag().as_bytes(), &[0; 16]); + } + + #[test] + fn test_vector_9() { + let key = [0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + + let mut state = Poly1305::new(&key); + state.block(&[0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]); + + assert_eq!(state.tag().as_bytes(), + &[0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]); + } + + #[test] + fn test_vector_10() { + let key = [0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + + let mut state = Poly1305::new(&key); + state.block(&[0xe3, 0x35, 0x94, 0xd7, 0x50, 0x5e, 0x43, 0xb9, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]); + state.block(&[0x33, 0x94, 0xd7, 0x50, 0x5e, 0x43, 0x79, 0xcd, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]); + state.block(&[0; 16]); + state.block(&[0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]); + + assert_eq!(state.tag().as_bytes(), + &[0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]); + } + + #[test] + fn test_vector_11() { + let key = [0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + + let mut state = Poly1305::new(&key); + state.block(&[0xe3, 0x35, 0x94, 0xd7, 0x50, 0x5e, 0x43, 0xb9, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]); + state.block(&[0x33, 0x94, 0xd7, 0x50, 0x5e, 0x43, 0x79, 0xcd, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]); + state.block(&[0; 16]); + + assert_eq!(state.tag().as_bytes(), + &[0x13, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]); + } +} + +#[cfg(all(feature = "bench", test))] +mod bench { + use test::{Bencher, black_box}; + use super::Poly1305; + + #[bench] + fn bench_new(b: &mut Bencher) { + let key = [!0; 32]; + + b.bytes = 32; + b.iter(|| { + Poly1305::new(black_box(&key)) + }) + } + + #[bench] + fn bench_block(b: &mut Bencher) { + let mut state = Poly1305::new(&[!0; 32]); + let msg = [!0; 16]; + + b.bytes = 16; + b.iter(|| { + black_box(&mut state).block(black_box(&msg)) + }) + } + + #[bench] + fn bench_last_block(b: &mut Bencher) { + let state = Poly1305::new(&[!0; 32]); + let msg = [!0; 16]; + + b.bytes = 16; + b.iter(|| { + black_box(&state).clone().last_block(black_box(&msg)) + }) + } + + #[bench] + fn bench_tag(b: &mut Bencher) { + let state = Poly1305::new(&[!0; 32]); + + b.bytes = 16; + b.iter(|| { + black_box(&state).clone().tag() + }) + } +} diff --git a/src/simd.rs b/src/simd.rs new file mode 100644 index 0000000..2771902 --- /dev/null +++ b/src/simd.rs @@ -0,0 +1,110 @@ +// Copyright 2015 chacha20-poly1305-aead Developers +// +// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be +// copied, modified, or distributed except according to those terms. + +#![cfg_attr(feature = "clippy", allow(inline_always))] + +use crate::simd_opt; + +pub use crate::simdty::u32x4; + +pub trait Vector4: Copy { + fn from_le(self) -> Self; + fn to_le(self) -> Self; + + fn wrapping_add(self, rhs: Self) -> Self; + + fn rotate_left_const(self, n: u32) -> Self; + + fn shuffle_left_1(self) -> Self; + fn shuffle_left_2(self) -> Self; + fn shuffle_left_3(self) -> Self; + + #[inline(always)] fn shuffle_right_1(self) -> Self { self.shuffle_left_3() } + #[inline(always)] fn shuffle_right_2(self) -> Self { self.shuffle_left_2() } + #[inline(always)] fn shuffle_right_3(self) -> Self { self.shuffle_left_1() } +} + +macro_rules! impl_vector4 { + ($vec:ident, $word:ident) => { + impl Vector4<$word> for $vec { + #[cfg(target_endian = "little")] + #[inline(always)] + fn from_le(self) -> Self { self } + + #[cfg(not(target_endian = "little"))] + #[inline(always)] + fn from_le(self) -> Self { + $vec::new($word::from_le(self.0), + $word::from_le(self.1), + $word::from_le(self.2), + $word::from_le(self.3)) + } + + #[cfg(target_endian = "little")] + #[inline(always)] + fn to_le(self) -> Self { self } + + #[cfg(not(target_endian = "little"))] + #[inline(always)] + fn to_le(self) -> Self { + $vec::new(self.0.to_le(), + self.1.to_le(), + self.2.to_le(), + self.3.to_le()) + } + + #[inline(always)] + fn wrapping_add(self, rhs: Self) -> Self { self + rhs } + + #[inline(always)] + fn rotate_left_const(self, n: u32) -> Self { + simd_opt::$vec::rotate_left_const(self, n) + } + + #[cfg(feature = "simd")] + #[inline(always)] + fn shuffle_left_1(self) -> Self { + use simdint::simd_shuffle4; + unsafe { simd_shuffle4(self, self, [1, 2, 3, 0]) } + } + + #[cfg(not(feature = "simd"))] + #[inline(always)] + fn shuffle_left_1(self) -> Self { + $vec::new(self.1, self.2, self.3, self.0) + } + + #[cfg(feature = "simd")] + #[inline(always)] + fn shuffle_left_2(self) -> Self { + use simdint::simd_shuffle4; + unsafe { simd_shuffle4(self, self, [2, 3, 0, 1]) } + } + + #[cfg(not(feature = "simd"))] + #[inline(always)] + fn shuffle_left_2(self) -> Self { + $vec::new(self.2, self.3, self.0, self.1) + } + + #[cfg(feature = "simd")] + #[inline(always)] + fn shuffle_left_3(self) -> Self { + use simdint::simd_shuffle4; + unsafe { simd_shuffle4(self, self, [3, 0, 1, 2]) } + } + + #[cfg(not(feature = "simd"))] + #[inline(always)] + fn shuffle_left_3(self) -> Self { + $vec::new(self.3, self.0, self.1, self.2) + } + } + } +} + +impl_vector4!(u32x4, u32); diff --git a/src/simd_opt/mod.rs b/src/simd_opt/mod.rs new file mode 100644 index 0000000..69bef7b --- /dev/null +++ b/src/simd_opt/mod.rs @@ -0,0 +1,44 @@ +// Copyright 2015 chacha20-poly1305-aead Developers +// +// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be +// copied, modified, or distributed except according to those terms. + +#![cfg_attr(feature = "clippy", allow(inline_always))] + +#[cfg(feature = "simd")] +macro_rules! transmute_shuffle { + ($tmp:ident, $shuffle:ident, $vec:expr, $idx:expr) => { + unsafe { + use simdty::$tmp; + use simdint::$shuffle; + use std::mem::transmute; + + let tmp_i: $tmp = transmute($vec); + let tmp_o: $tmp = $shuffle(tmp_i, tmp_i, $idx); + transmute(tmp_o) + } + } +} + +#[cfg(feature = "simd")] pub mod u32x4; + +#[cfg(not(feature = "simd"))] +macro_rules! simd_opt { + ($vec:ident) => { + pub mod $vec { + use crate::simdty::$vec; + + #[inline(always)] + pub fn rotate_left_const(vec: $vec, n: u32) -> $vec { + $vec::new(vec.0.rotate_left(n), + vec.1.rotate_left(n), + vec.2.rotate_left(n), + vec.3.rotate_left(n)) + } + } + } +} + +#[cfg(not(feature = "simd"))] simd_opt!(u32x4); diff --git a/src/simd_opt/u32x4.rs b/src/simd_opt/u32x4.rs new file mode 100644 index 0000000..bcacbb8 --- /dev/null +++ b/src/simd_opt/u32x4.rs @@ -0,0 +1,71 @@ +// Copyright 2015 chacha20-poly1305-aead Developers +// +// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be +// copied, modified, or distributed except according to those terms. + +#![cfg_attr(feature = "clippy", allow(inline_always))] + +use crate::simdty::u32x4; + +#[cfg(feature = "simd_opt")] +#[inline(always)] +pub fn rotate_left_const(vec: u32x4, n: u32) -> u32x4 { + match n { + 16 => rotate_left_16(vec), + 8 => rotate_left_8(vec), + _ => rotate_left_any(vec, n), + } +} + +#[cfg(not(feature = "simd_opt"))] +#[inline(always)] +pub fn rotate_left_const(vec: u32x4, n: u32) -> u32x4 { + rotate_left_any(vec, n) +} + +#[inline(always)] +fn rotate_left_any(vec: u32x4, n: u32) -> u32x4 { + let l = n as u32; + let r = 32 - l; + + (vec << u32x4::new(l, l, l, l)) ^ (vec >> u32x4::new(r, r, r, r)) +} + +#[cfg(feature = "simd_opt")] +#[inline(always)] +fn rotate_left_16(vec: u32x4) -> u32x4 { + if cfg!(target_feature = "ssse3") { + // pshufb (SSSE3) / vpshufb (AVX2) + transmute_shuffle!(u8x16, simd_shuffle16, vec, + [ 2, 3, 0, 1, + 6, 7, 4, 5, + 10, 11, 8, 9, + 14, 15, 12, 13]) + } else if cfg!(any(target_feature = "sse2", target_feature = "neon")) { + // pshuflw+pshufhw (SSE2) / vrev (NEON) + transmute_shuffle!(u16x8, simd_shuffle8, vec, + [1, 0, + 3, 2, + 5, 4, + 7, 6]) + } else { + rotate_left_any(vec, 16) + } +} + +#[cfg(feature = "simd_opt")] +#[inline(always)] +fn rotate_left_8(vec: u32x4) -> u32x4 { + if cfg!(target_feature = "ssse3") { + // pshufb (SSSE3) / vpshufb (AVX2) + transmute_shuffle!(u8x16, simd_shuffle16, vec, + [ 3, 0, 1, 2, + 7, 4, 5, 6, + 11, 8, 9, 10, + 15, 12, 13, 14]) + } else { + rotate_left_any(vec, 8) + } +} diff --git a/src/simdint.rs b/src/simdint.rs new file mode 100644 index 0000000..1f69acd --- /dev/null +++ b/src/simdint.rs @@ -0,0 +1,20 @@ +// Copyright 2015 chacha20-poly1305-aead Developers +// +// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be +// copied, modified, or distributed except according to those terms. + +#![allow(dead_code)] + +#[cfg(feature = "simd")] +extern "platform-intrinsic" { + pub fn simd_add(x: T, y: T) -> T; + pub fn simd_shl(x: T, y: T) -> T; + pub fn simd_shr(x: T, y: T) -> T; + pub fn simd_xor(x: T, y: T) -> T; + + pub fn simd_shuffle4(v: T, w: T, idx: [u32; 4]) -> U; + pub fn simd_shuffle8(v: T, w: T, idx: [u32; 8]) -> U; + pub fn simd_shuffle16(v: T, w: T, idx: [u32; 16]) -> U; +} diff --git a/src/simdop.rs b/src/simdop.rs new file mode 100644 index 0000000..fd99945 --- /dev/null +++ b/src/simdop.rs @@ -0,0 +1,93 @@ +// Copyright 2015 chacha20-poly1305-aead Developers +// +// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be +// copied, modified, or distributed except according to those terms. + +use crate::simdty::u32x4; +#[cfg(feature = "simd")] use crate::simdint; + +use std::ops::{Add, BitXor, Shl, Shr}; + +macro_rules! impl_ops { + ($vec:ident) => { + impl Add for $vec { + type Output = Self; + + #[cfg(feature = "simd")] + #[inline(always)] + fn add(self, rhs: Self) -> Self::Output { + unsafe { simdint::simd_add(self, rhs) } + } + + #[cfg(not(feature = "simd"))] + #[inline(always)] + fn add(self, rhs: Self) -> Self::Output { + $vec::new(self.0.wrapping_add(rhs.0), + self.1.wrapping_add(rhs.1), + self.2.wrapping_add(rhs.2), + self.3.wrapping_add(rhs.3)) + } + } + + impl BitXor for $vec { + type Output = Self; + + #[cfg(feature = "simd")] + #[inline(always)] + fn bitxor(self, rhs: Self) -> Self::Output { + unsafe { simdint::simd_xor(self, rhs) } + } + + #[cfg(not(feature = "simd"))] + #[inline(always)] + fn bitxor(self, rhs: Self) -> Self::Output { + $vec::new(self.0 ^ rhs.0, + self.1 ^ rhs.1, + self.2 ^ rhs.2, + self.3 ^ rhs.3) + } + } + + impl Shl<$vec> for $vec { + type Output = Self; + + #[cfg(feature = "simd")] + #[inline(always)] + fn shl(self, rhs: Self) -> Self::Output { + unsafe { simdint::simd_shl(self, rhs) } + } + + #[cfg(not(feature = "simd"))] + #[inline(always)] + fn shl(self, rhs: Self) -> Self::Output { + $vec::new(self.0 << rhs.0, + self.1 << rhs.1, + self.2 << rhs.2, + self.3 << rhs.3) + } + } + + impl Shr<$vec> for $vec { + type Output = Self; + + #[cfg(feature = "simd")] + #[inline(always)] + fn shr(self, rhs: Self) -> Self::Output { + unsafe { simdint::simd_shr(self, rhs) } + } + + #[cfg(not(feature = "simd"))] + #[inline(always)] + fn shr(self, rhs: Self) -> Self::Output { + $vec::new(self.0 >> rhs.0, + self.1 >> rhs.1, + self.2 >> rhs.2, + self.3 >> rhs.3) + } + } + } +} + +impl_ops!(u32x4); diff --git a/src/simdty.rs b/src/simdty.rs new file mode 100644 index 0000000..49ddd9b --- /dev/null +++ b/src/simdty.rs @@ -0,0 +1,59 @@ +// Copyright 2016 chacha20-poly1305-aead Developers +// +// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be +// copied, modified, or distributed except according to those terms. + +#![allow(dead_code)] +#![allow(non_camel_case_types)] + +use crate::as_bytes::Safe; + +#[cfg(feature = "simd")] +macro_rules! decl_simd { + ($($decl:item)*) => { + $( + #[derive(Clone, Copy, Debug, Default)] + #[repr(simd)] + $decl + )* + } +} + +#[cfg(not(feature = "simd"))] +macro_rules! decl_simd { + ($($decl:item)*) => { + $( + #[derive(Clone, Copy, Debug, Default)] + #[repr(C)] + $decl + )* + } +} + +decl_simd! { + pub struct Simd4(pub T, pub T, pub T, pub T); + pub struct Simd8(pub T, pub T, pub T, pub T, + pub T, pub T, pub T, pub T); + pub struct Simd16(pub T, pub T, pub T, pub T, + pub T, pub T, pub T, pub T, + pub T, pub T, pub T, pub T, + pub T, pub T, pub T, pub T); +} + +pub type u32x4 = Simd4; +pub type u16x8 = Simd8; +pub type u8x16 = Simd16; + +#[cfg_attr(feature = "clippy", allow(inline_always))] +impl Simd4 { + #[inline(always)] + pub fn new(e0: T, e1: T, e2: T, e3: T) -> Simd4 { + Simd4(e0, e1, e2, e3) + } +} + +unsafe impl Safe for Simd4 {} +unsafe impl Safe for Simd8 {} +unsafe impl Safe for Simd16 {}