diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7a4b600 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +cscope.out +*.o +*.ko +*.mod.c +Module.symvers +*.cmd +.tmp_versions +*.swp +modules.order +modules.builtin diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..d159169 --- /dev/null +++ b/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..79c1daa --- /dev/null +++ b/README.md @@ -0,0 +1,10 @@ +# WireGuard — fast, modern, secure kernel VPN tunnel +#### by [Jason A. Donenfeld](mailto:Jason@zx2c4.com) of [Edge Security](http://www.edgesecurity.com/) + +WireGuard is a novel VPN that runs inside the Linux Kernel and utilizes **state-of-the-art [cryptography](doc/protocol.md)**. It aims to be faster, simpler, leaner, and more useful than IPSec, while avoiding the massive headache. It intends to be considerably more performant than OpenVPN. WireGuard is designed as a general purpose VPN for running on embedded interfaces and super computers alike, fit for many different circumstances. It runs over UDP. + +**More information may be found at [WireGuard.io](https://www.wireguard.io/).** + +## License + +This project is released under the [GPLv2](COPYING). diff --git a/contrib/benchmarking/configs/other.conf b/contrib/benchmarking/configs/other.conf new file mode 100644 index 0000000..4257914 --- /dev/null +++ b/contrib/benchmarking/configs/other.conf @@ -0,0 +1,8 @@ +[Interface] +ListenPort = 27183 +PrivateKey = oHilodMrwJSD1UUIkAkyCek2yqy1Frs5XuN47ShGFk0= + +[Peer] +PublicKey = S8hEvD+dam+PrwG4GrSPtE2Pl3ylO/oiUnUDXw3vnx0= +AllowedIPs = 192.168.2.2/32 +Endpoint = 10.10.10.100:38292 \ No newline at end of file diff --git a/contrib/benchmarking/configs/thinkpad.conf b/contrib/benchmarking/configs/thinkpad.conf new file mode 100644 index 0000000..df02b2b --- /dev/null +++ b/contrib/benchmarking/configs/thinkpad.conf @@ -0,0 +1,8 @@ +[Interface] +ListenPort = 38292 +PrivateKey = MPCo/WSBkm/DCkbEXUhtjc5u//IeD6wEeaw3Q2HxFGw= + +[Peer] +PublicKey = c5PwaIZcVZFDuoDdQJGnYe+fk+wt0qANARpnZDOvqhw= +AllowedIPs = 0.0.0.0/0 +Endpoint = 172.16.48.128:27183 diff --git a/contrib/benchmarking/openvpn-config.txt b/contrib/benchmarking/openvpn-config.txt new file mode 100644 index 0000000..f51eabd --- /dev/null +++ b/contrib/benchmarking/openvpn-config.txt @@ -0,0 +1,2 @@ +Server: openvpn --dev tun --ifconfig 192.168.3.1 192.168.3.2 --secret static.key --cipher AES-256-CBC --auth SHA256 --port 61721 +Client: openvpn --dev tun --ifconfig 192.168.3.2 192.168.3.1 --secret static.key --cipher AES-256-CBC --auth SHA256 --port 61721 --remote 10.10.10.1 diff --git a/contrib/benchmarking/static.key b/contrib/benchmarking/static.key new file mode 100644 index 0000000..53075fe --- /dev/null +++ b/contrib/benchmarking/static.key @@ -0,0 +1,21 @@ +# +# 2048 bit OpenVPN static key +# +-----BEGIN OpenVPN Static key V1----- +12abb34ac1cb716576642c7e4c9719af +b311929f6bb5a7b9082c9ac3a02dc77a +26fc65ba97e67d1dc5b273e72760caba +6c8a3321acdf89bfd0469528bfc9ed89 +1c9c3762d1e18786c8b6dd590456f158 +d1f625810da1225864c23d7e848ca5d7 +18a49c4b7e640f8e51001ace9222de75 +e05177fd01b32d702bd12b45b085678c +239e3927d98912174ac648d0e37a3247 +45cabcbea7cf70832f8800a8b863a35a +933c5921fd65882b050bd1096a0c6c60 +638fb22eafb9f49c13573236d0427441 +c98869ba8de30e597452237527e7dcc6 +519058a919de4432203dc1d7622fb4d0 +f8f20c5350256cdf17bb3b85c5c838fc +6ddeb4da9dae8b0b882cb043db483a9d +-----END OpenVPN Static key V1----- diff --git a/contrib/client-server-example/client.sh b/contrib/client-server-example/client.sh new file mode 100755 index 0000000..fbae46a --- /dev/null +++ b/contrib/client-server-example/client.sh @@ -0,0 +1,20 @@ +#!/bin/bash +set -e +[[ $UID == 0 ]] || { echo "You must be root to run this."; exit 1; } +umask 077 +trap 'rm -f /tmp/wg_private_key' EXIT INT TERM +exec 3<>/dev/tcp/demo.wireguard.io/42912 +wg genkey | tee /tmp/wg_private_key | wg pubkey >&3 +IFS=: read -r status server_pubkey server_port internal_ip <&3 +[[ $status == OK ]] +ip link del dev wg0 2>/dev/null || true +ip link add dev wg0 type wireguard +wg set wg0 private-key /tmp/wg_private_key peer "$server_pubkey" allowed-ips 0.0.0.0/0 endpoint "demo.wireguard.io:$server_port" +ip address add "$internal_ip"/24 dev wg0 +ip link set up dev wg0 +if [ "$1" == "default-route" ]; then + host="$(wg show wg0 endpoints | sed -n 's/.*\t\(.*\):.*/\1/p')" + ip route add $(ip route get $host | sed '/ via [0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}/{s/^\(.* via [0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\).*/\1/}' | head -n 1) 2>/dev/null || true + ip route add 0/1 dev wg0 + ip route add 128/1 dev wg0 +fi diff --git a/contrib/client-server-example/server.sh b/contrib/client-server-example/server.sh new file mode 100755 index 0000000..e37861f --- /dev/null +++ b/contrib/client-server-example/server.sh @@ -0,0 +1,14 @@ +#!/bin/bash +if [[ -z $NCAT_REMOTE_ADDR ]]; then + ip link del dev wg0 2>/dev/null + set -e + ip link add dev wg0 type wireguard + ip address add 192.168.4.1/24 dev wg0 + wg set wg0 private-key <(wg genkey) listen-port 12912 + ip link set up dev wg0 + exec ncat -e "$(readlink -f "$0")" -k -l -p 42912 -v +fi +read -r public_key +[[ $(wg show wg0 | grep peer | wc -l) -ge 253 ]] && wg set wg0 peer $(wg show wg0 latest-handshakes | sort -k 2 -b -n | head -n 1 | cut -f 1) remove +next_ip=$(all="$(wg show wg0 allowed-ips)"; for ((i=2; i<=254; i++)); do ip="192.168.4.$i"; [[ $all != *$ip/32* ]] && echo $ip && break; done) +wg set wg0 peer "$public_key" allowed-ips $next_ip/32 2>/dev/null && echo "OK:$(wg show wg0 private-key | wg pubkey):$(wg show wg0 listen-port):$next_ip" || echo ERROR diff --git a/contrib/external-tests/go/main.go b/contrib/external-tests/go/main.go new file mode 100644 index 0000000..de7337b --- /dev/null +++ b/contrib/external-tests/go/main.go @@ -0,0 +1,63 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +package main + +import ( + "github.com/titanous/noise" + "net" + "time" + "bytes" + "crypto/rand" + "encoding/base64" + "encoding/binary" + "github.com/dchest/blake2s" +) + +func assert(exp bool) { + if !exp { + panic("Assertion failed.") + } +} + +func main() { + my_private, _ := base64.StdEncoding.DecodeString("WAmgVYXkbT2bCtdcDwolI88/iVi/aV3/PHcUBTQSYmo=") + my_public, _ := base64.StdEncoding.DecodeString("K5sF9yESrSBsOXPd6TcpKNgqoy1Ik3ZFKl4FolzrRyI=") + preshared, _ := base64.StdEncoding.DecodeString("FpCyhws9cxwWoV4xELtfJvjJN+zQVRPISllRWgeopVE=") + their_public, _ := base64.StdEncoding.DecodeString("qRCwZSKInrMAq5sepfCdaCsRJaoLe5jhtzfiw7CjbwM=") + cs := noise.NewCipherSuite(noise.DH25519, noise.CipherChaChaPoly, noise.HashBLAKE2s) + hs := noise.NewHandshakeState(noise.Config{CipherSuite: cs, Random: rand.Reader, Pattern: noise.HandshakeIK, Initiator: true, Prologue: []byte("WireGuard v0 zx2c4 Jason@zx2c4.com"), PresharedKey: preshared, StaticKeypair: noise.DHKey{Private: my_private, Public: my_public}, PeerStatic: their_public}) + conn, _ := net.Dial("udp", "test.wireguard.io:51820") + + now := time.Now() + tai64n := make([]byte, 12) + binary.BigEndian.PutUint64(tai64n[:], uint64(now.Unix())) + binary.BigEndian.PutUint32(tai64n[8:], uint32(now.UnixNano())) + initiation_packet := make([]byte, 5) + initiation_packet[0] = 1 /* Type: Initiation */ + binary.LittleEndian.PutUint32(initiation_packet[1:], 28) /* Sender index: 28 (arbitrary) */ + initiation_packet, _, _ = hs.WriteMessage(initiation_packet, tai64n) + hasher, _ := blake2s.New(&blake2s.Config{Size: 16, Key: preshared}) + hasher.Write(their_public) + hasher.Write(initiation_packet) + initiation_packet = append(initiation_packet, hasher.Sum(nil)[:16]...) + initiation_packet = append(initiation_packet, bytes.Repeat([]byte{ 0 }, 16)...) + conn.Write(initiation_packet) + + response_packet := make([]byte, 89) + conn.Read(response_packet) + assert(response_packet[0] == 2 /* Type: Response */) + their_index := binary.LittleEndian.Uint32(response_packet[1:]) + our_index := binary.LittleEndian.Uint32(response_packet[5:]) + assert(our_index == 28) + payload, send_cs, _, err := hs.ReadMessage(nil, response_packet[9:57]) + assert(len(payload) == 0 && err == nil) + + keepalive_packet := make([]byte, 13) + keepalive_packet[0] = 4 /* Type: Data */ + binary.LittleEndian.PutUint32(keepalive_packet[1:], their_index) + binary.LittleEndian.PutUint64(keepalive_packet[3:], 0) /* Nonce */ + keepalive_packet = send_cs.Encrypt(keepalive_packet, nil, nil) + conn.Write(keepalive_packet) + + conn.Close() +} diff --git a/contrib/external-tests/haskell/Setup.hs b/contrib/external-tests/haskell/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/contrib/external-tests/haskell/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/contrib/external-tests/haskell/cacophony-wg.cabal b/contrib/external-tests/haskell/cacophony-wg.cabal new file mode 100644 index 0000000..62e2485 --- /dev/null +++ b/contrib/external-tests/haskell/cacophony-wg.cabal @@ -0,0 +1,34 @@ +-- Initial cacophony-wg.cabal generated by cabal init. For further +-- documentation, see http://haskell.org/cabal/users-guide/ + +name: cacophony-wg +version: 0.1.0 +-- synopsis: +-- description: +license: PublicDomain +license-file: LICENSE +author: John Galt +maintainer: centromere@users.noreply.github.com +-- copyright: +-- category: +build-type: Simple +-- extra-source-files: +cabal-version: >=1.10 + +executable cacophony-wg + main-is: Main.hs + other-modules: + Data.Time.TAI64 + build-depends: + base >=4.8 && <4.9, + base16-bytestring, + base64-bytestring, + blake2, + bytestring, + cacophony, + cereal, + cryptonite, + network, + time + hs-source-dirs: src + default-language: Haskell2010 diff --git a/contrib/external-tests/haskell/src/Data/Time/TAI64.hs b/contrib/external-tests/haskell/src/Data/Time/TAI64.hs new file mode 100644 index 0000000..37a90e6 --- /dev/null +++ b/contrib/external-tests/haskell/src/Data/Time/TAI64.hs @@ -0,0 +1,86 @@ +module Data.Time.TAI64 ( + TAI64(..) + , TAI64N(..) + , TAI64NA(..) + , posixToTAI64 + , posixToTAI64N + , posixToTAI64NA + , getCurrentTAI64 + , getCurrentTAI64N + , getCurrentTAI64NA + , tAI64ToPosix + , tAI64NToPosix + , tAI64NAToPosix +) where + +import Data.Serialize +import Control.Monad +import Data.Word + +import Data.Time.Clock +import Data.Time.Clock.POSIX + +import Numeric + +data TAI64 = TAI64 + {-# UNPACK #-} !Word64 + deriving (Eq, Ord) + +data TAI64N = TAI64N + {-# UNPACK #-} !TAI64 + {-# UNPACK #-} !Word32 + deriving (Eq, Ord, Show) + +data TAI64NA = TAI64NA + {-# UNPACK #-} !TAI64N + {-# UNPACK #-} !Word32 + deriving (Eq, Ord, Show) + +instance Show TAI64 where + show (TAI64 t) = "TAI64 0x" ++ showHex t "" + +instance Serialize TAI64 where + put (TAI64 t) = putWord64be t + get = liftM TAI64 get + +instance Serialize TAI64N where + put (TAI64N t' nt) = put t' >> putWord32be nt + get = liftM2 TAI64N get get + +instance Serialize TAI64NA where + put (TAI64NA t' at) = put t' >> putWord32be at + get = liftM2 TAI64NA get get + + +posixToTAI64 :: POSIXTime -> TAI64 +posixToTAI64 = TAI64 . (2^62 +) . truncate . realToFrac + +posixToTAI64N :: POSIXTime -> TAI64N +posixToTAI64N pt = TAI64N t' ns where + t' = posixToTAI64 pt + ns = (`mod` 10^9) $ truncate (pts * 10**9) + pts = realToFrac pt + +posixToTAI64NA :: POSIXTime -> TAI64NA -- | PICOsecond precision +posixToTAI64NA pt = TAI64NA t' as where + t' = posixToTAI64N pt + as = (`mod` 10^9) $ truncate (pts * 10**18) + pts = realToFrac pt + +getCurrentTAI64 :: IO TAI64 +getCurrentTAI64N :: IO TAI64N +getCurrentTAI64NA :: IO TAI64NA +getCurrentTAI64 = liftM posixToTAI64 getPOSIXTime +getCurrentTAI64N = liftM posixToTAI64N getPOSIXTime +getCurrentTAI64NA = liftM posixToTAI64NA getPOSIXTime + +tAI64ToPosix :: TAI64 -> POSIXTime +tAI64ToPosix (TAI64 s) = fromRational . fromIntegral $ s - 2^62 + +tAI64NToPosix :: TAI64N -> POSIXTime +tAI64NToPosix (TAI64N t' n) = tAI64ToPosix t' + nanopart where + nanopart = fromRational $ (toRational $ 10**(-9)) * toRational n -- TODO: optimize? + +tAI64NAToPosix :: TAI64NA -> POSIXTime +tAI64NAToPosix (TAI64NA t' a) = tAI64NToPosix t' + attopart where + attopart = fromRational $ (toRational $ 10**(-18)) * toRational a diff --git a/contrib/external-tests/haskell/src/Main.hs b/contrib/external-tests/haskell/src/Main.hs new file mode 100644 index 0000000..f78305d --- /dev/null +++ b/contrib/external-tests/haskell/src/Main.hs @@ -0,0 +1,81 @@ +{-# LANGUAGE OverloadedStrings #-} +module Main where + +import Control.Applicative ((<$>)) +import Control.Concurrent.MVar +import Control.Monad (void) +import Data.ByteString.Char8 (pack, unpack, take, drop, replicate) +import Data.ByteString (ByteString) +import qualified Data.ByteString.Base16 as Hex +import qualified Data.ByteString.Base64 as B64 +import qualified Data.Serialize as S +import Prelude hiding (take, drop, replicate) +import System.Environment +import Network.Socket +import qualified Network.Socket.ByteString as NBS + +import Crypto.Hash.BLAKE2.BLAKE2s +import Crypto.Noise.Cipher +import Crypto.Noise.Cipher.ChaChaPoly1305 +import Crypto.Noise.Curve +import Crypto.Noise.Curve.Curve25519 +import Crypto.Noise.Handshake +import Crypto.Noise.HandshakePatterns +import Crypto.Noise.Hash.BLAKE2s +import Crypto.Noise.Types + +import Data.Time.TAI64 + +w :: PublicKey Curve25519 + -> Plaintext + -> Socket + -> SockAddr + -> ByteString + -> IO () +w theirPub (Plaintext myPSK) sock addr msg = do + let x = "\x01\x00\x00" `mappend` msg + mac = hash 16 myPSK (sbToBS' (curvePubToBytes theirPub) `mappend` sbToBS' x) + void $ NBS.sendTo sock (x `mappend` mac `mappend` replicate 16 '\0') addr + +r :: MVar ByteString -> Socket -> IO ByteString +r smv sock = do + (r, _) <- NBS.recvFrom sock 1024 + putMVar smv $ (take 2 . drop 1) r + return . take 48 . drop 5 $ r + +payload :: IO Plaintext +payload = do + tai64n <- getCurrentTAI64N + return . Plaintext . bsToSB' $ S.encode tai64n + +main :: IO () +main = do + let ip = "test.wireguard.io" + let port = "51820" + let mykey = "WAmgVYXkbT2bCtdcDwolI88/iVi/aV3/PHcUBTQSYmo=" + let serverkey = "qRCwZSKInrMAq5sepfCdaCsRJaoLe5jhtzfiw7CjbwM=" + let psk = "FpCyhws9cxwWoV4xELtfJvjJN+zQVRPISllRWgeopVE=" + addrInfo <- head <$> getAddrInfo Nothing (Just ip) (Just port) + sock <- socket (addrFamily addrInfo) Datagram defaultProtocol + + let addr = addrAddress addrInfo + mykey' = curveBytesToPair . bsToSB' . either undefined id . B64.decode . pack $ mykey :: KeyPair Curve25519 + serverkey' = curveBytesToPub . bsToSB' . either undefined id . B64.decode . pack $ serverkey :: PublicKey Curve25519 + psk' = Plaintext . bsToSB' . either undefined id . B64.decode . pack $ psk + hs = handshakeState $ HandshakeStateParams + noiseIK + "WireGuard v0 zx2c4 Jason@zx2c4.com" + (Just psk') + (Just mykey') + Nothing + (Just serverkey') + Nothing + True :: HandshakeState ChaChaPoly1305 Curve25519 BLAKE2s + + senderindexmv <- newEmptyMVar + let hc = HandshakeCallbacks (w serverkey' psk' sock addr) (r senderindexmv sock) (\_ -> return ()) payload + (encryption, decryption) <- runHandshake hs hc + + let (keepAlive, encryption') = encryptPayload "" encryption + senderindex <- takeMVar senderindexmv + void $ NBS.sendTo sock ("\x04" `mappend` senderindex `mappend` replicate 8 '\0' `mappend` keepAlive) addr diff --git a/contrib/external-tests/rust/.gitignore b/contrib/external-tests/rust/.gitignore new file mode 100644 index 0000000..1e7caa9 --- /dev/null +++ b/contrib/external-tests/rust/.gitignore @@ -0,0 +1,2 @@ +Cargo.lock +target/ diff --git a/contrib/external-tests/rust/Cargo.toml b/contrib/external-tests/rust/Cargo.toml new file mode 100644 index 0000000..c064905 --- /dev/null +++ b/contrib/external-tests/rust/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "wireguard-keepalive" +version = "0.1.0" +authors = ["jason@zx2c4.com"] +[dependencies] +screech = { git = "https://github.com/trevp/screech" } +rust-crypto = "*" +byteorder = "*" +rustc-serialize = "*" +time = "*" diff --git a/contrib/external-tests/rust/src/main.rs b/contrib/external-tests/rust/src/main.rs new file mode 100644 index 0000000..fa468af --- /dev/null +++ b/contrib/external-tests/rust/src/main.rs @@ -0,0 +1,74 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ +extern crate screech; +extern crate crypto; +extern crate time; +extern crate rustc_serialize; +extern crate byteorder; + +use screech::*; +use byteorder::{ByteOrder, BigEndian, LittleEndian}; +use crypto::curve25519::curve25519_base; +use crypto::blake2s::Blake2s; +use rustc_serialize::base64::FromBase64; +use std::net::*; + +fn memcpy(out: &mut [u8], data: &[u8]) { + for count in 0..data.len() { + out[count] = data[count]; + } +} + +fn main() { + let send_addr = "test.wireguard.io:51820".to_socket_addrs().unwrap().next().unwrap(); + let listen_addr = "0.0.0.0:0".to_socket_addrs().unwrap().next().unwrap(); + let socket = UdpSocket::bind(listen_addr).unwrap(); + let mut empty_payload = [0; 0]; + + let mut their_public = [0; 32]; + memcpy(&mut their_public, &"qRCwZSKInrMAq5sepfCdaCsRJaoLe5jhtzfiw7CjbwM=".from_base64().unwrap()); + let mut my_private = [0; 32]; + memcpy(&mut my_private, &"WAmgVYXkbT2bCtdcDwolI88/iVi/aV3/PHcUBTQSYmo=".from_base64().unwrap()); + let mut my_preshared = [0; 32]; + memcpy(&mut my_preshared, &"FpCyhws9cxwWoV4xELtfJvjJN+zQVRPISllRWgeopVE=".from_base64().unwrap()); + let my_public = curve25519_base(&my_private); + let mut my_keypair : Dh25519 = Default::default(); + my_keypair.set(&my_private, &my_public); + let mut owner : HandshakeCryptoOwner = Default::default(); + owner.set_s(my_keypair); + owner.set_rs(&their_public); + let mut cipherstate1 : CipherState = Default::default(); + let mut cipherstate2 : CipherState = Default::default(); + let mut handshake = HandshakeState::new_from_owner(&mut owner, true, HandshakePattern::IK, "WireGuard v0 zx2c4 Jason@zx2c4.com".as_bytes(), Some(&my_preshared[..]), &mut cipherstate1, &mut cipherstate2); + + let now = time::get_time(); + let mut tai64n = [0; 12]; + BigEndian::write_i64(&mut tai64n[0..], now.sec); + BigEndian::write_i32(&mut tai64n[8..], now.nsec); + let mut initiation_packet = [0; 145]; + initiation_packet[0] = 1; /* Type: Initiation */ + LittleEndian::write_u32(&mut initiation_packet[1..], 28); /* Sender index: 28 (arbitrary) */ + handshake.write_message(&tai64n, &mut initiation_packet[5..]); + let mut mac_material = [0; 143]; + memcpy(&mut mac_material, &their_public); + memcpy(&mut mac_material[32..], &initiation_packet[0..113]); + let mut mac = [0; 16]; + Blake2s::blake2s(&mut mac, &mac_material, &my_preshared); + memcpy(&mut initiation_packet[113..], &mac); + socket.send_to(&initiation_packet, &send_addr).unwrap(); + + let mut response_packet = [0; 89]; + socket.recv_from(&mut response_packet).unwrap(); + assert!(response_packet[0] == 2 /* Type: Response */); + let their_index = LittleEndian::read_u32(&response_packet[1..]); + let our_index = LittleEndian::read_u32(&response_packet[5..]); + assert!(our_index == 28); + let (payload_len, last) = handshake.read_message(&response_packet[9..57], &mut empty_payload).unwrap(); + assert!(payload_len == 0 && last); + + let mut keepalive_packet = [0; 29]; + keepalive_packet[0] = 4; /* Type: Data */ + LittleEndian::write_u32(&mut keepalive_packet[1..], their_index); + LittleEndian::write_u64(&mut keepalive_packet[5..], cipherstate1.n); + cipherstate1.encrypt(&empty_payload, &mut keepalive_packet[13..]); /* Empty payload means keepalive */ + socket.send_to(&keepalive_packet, &send_addr).unwrap(); +} diff --git a/contrib/patch-kernel-builtin.sh b/contrib/patch-kernel-builtin.sh new file mode 100755 index 0000000..8229762 --- /dev/null +++ b/contrib/patch-kernel-builtin.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +K="$1" +WG="$(readlink -f "$(dirname "$(readlink -f "$0")")/../src/")" + +if [[ ! -e $K/net/Kconfig ]]; then + echo "You must specify the location of kernel sources as the first argument." >&2 + exit 1 +fi + +sed -i "/^if NET/a source \"$WG/Kconfig\"" "$K/net/Kconfig" +echo "obj-y += ../../../../../../../../../../../../../../../../../../../../../..$WG/" >> "$K/net/Makefile" diff --git a/contrib/stress-testing/badpacket.c b/contrib/stress-testing/badpacket.c new file mode 100644 index 0000000..eee61fc --- /dev/null +++ b/contrib/stress-testing/badpacket.c @@ -0,0 +1,27 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + static const unsigned char handshake1[143] = { 1, 0 }; + int fd = socket(AF_INET, SOCK_DGRAM, 0); + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = htons(atoi(argv[2])), + .sin_addr = inet_addr(argv[1]) + }; + connect(fd, (struct sockaddr *)&addr, sizeof(addr)); + + for (;;) + send(fd, handshake1, sizeof(handshake1), 0); + + close(fd); + + return 0; +} diff --git a/contrib/stress-testing/peg.c b/contrib/stress-testing/peg.c new file mode 100644 index 0000000..6b539fa --- /dev/null +++ b/contrib/stress-testing/peg.c @@ -0,0 +1,50 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned long long interface_tx_bytes(const char *interface) +{ + char buf[PATH_MAX]; + FILE *f; + unsigned long long ret; + snprintf(buf, PATH_MAX - 1, "/sys/class/net/%s/statistics/tx_bytes", interface); + f = fopen(buf, "r"); + fscanf(f, "%llu", &ret); + fclose(f); + return ret; +} + +int main(int argc, char *argv[]) +{ + char buf[1500] = { 0 }; + unsigned long long before, after, i; + struct timespec begin, end; + double elapsed; + struct ifreq req; + int fd = socket(AF_INET, SOCK_DGRAM, 0); + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = htons(7271), + .sin_addr = inet_addr(argv[3]) + }; + strcpy(req.ifr_name, argv[1]); + ioctl(fd, SIOCGIFMTU, &req); + + connect(fd, (struct sockaddr *)&addr, sizeof(addr)); + + before = interface_tx_bytes(argv[2]); + clock_gettime(CLOCK_MONOTONIC, &begin); + for (i = 0; i < 10000000; ++i) + send(fd, buf, req.ifr_mtu - 28, 0); + clock_gettime(CLOCK_MONOTONIC, &end); + after = interface_tx_bytes(argv[2]); + elapsed = end.tv_sec - begin.tv_sec + (end.tv_nsec - begin.tv_nsec) / 1000000000.0; + + printf("%.4f mbps\n", ((after - before) * 8) / elapsed / 1000000.0); + return 0; +} diff --git a/contrib/stress-testing/self-send.sh b/contrib/stress-testing/self-send.sh new file mode 100755 index 0000000..eb7947b --- /dev/null +++ b/contrib/stress-testing/self-send.sh @@ -0,0 +1,48 @@ +#!/bin/bash +set -e + +PRIVATE_KEYS=("") +PUBLIC_KEYS=("") + +resetwg() { + for i in {1..64}; do + ip link delete dev wg${i} 2>/dev/null >/dev/null || true + done +} + +for i in {1..64}; do + next_key="$(wg genkey)" + PRIVATE_KEYS+=("$next_key") + PUBLIC_KEYS+=($(wg pubkey <<<"$next_key")) +done + +resetwg +trap resetwg INT TERM EXIT + +for i in {1..64}; do + { echo "[Interface]" + echo "ListenPort = $(( $i + 31222 ))" + echo "PrivateKey = ${PRIVATE_KEYS[$i]}" + + for j in {1..64}; do + [[ $i == $j ]] && continue + echo "[Peer]" + echo "PublicKey = ${PUBLIC_KEYS[$j]}" + echo "AllowedIPs = 192.168.8.${j}/32" + echo "Endpoint = 127.0.0.1:$(( $j + 31222 ))" + done + } > "/tmp/deviceload.conf" + + ip link add dev wg${i} type wireguard + wg setconf wg${i} "/tmp/deviceload.conf" + ip link set up dev wg${i} + rm "/tmp/deviceload.conf" +done + +ip address add dev wg1 192.168.8.1/24 + +while true; do + for i in {2..64}; do + echo hello | ncat -u 192.168.8.${i} 1234 + done +done diff --git a/contrib/stress-testing/threewayiperf.sh b/contrib/stress-testing/threewayiperf.sh new file mode 100755 index 0000000..932d666 --- /dev/null +++ b/contrib/stress-testing/threewayiperf.sh @@ -0,0 +1,30 @@ +#!/bin/bash +set -e + +if [[ $(hostname) == "thinkpad" ]]; then + make -C "$(dirname "$0")/../../src" remote-run + for i in 128 129 130; do + scp "$0" root@172.16.48.${i}: + done + for i in 128 129 130; do + konsole --new-tab -e ssh -t root@172.16.48.${i} "./$(basename "$0")" + done + exit +fi + +# perf top -U --dsos '[wireguard]' + +tmux new-session -s bigtest -d +tmux new-window -n "server 6000" -t bigtest "iperf3 -p 6000 -s" +tmux new-window -n "server 6001" -t bigtest "iperf3 -p 6001 -s" +sleep 5 +me=$(ip -o -4 address show dev wg0 | sed 's/.*inet \([^ ]*\)\/.*/\1/' | cut -d . -f 4) +for i in 1 2 3; do + [[ $i == $me ]] && continue + [[ $me == "1" ]] && port=6000 + [[ $me == "3" ]] && port=6001 + [[ $me == "2" && $i == "1" ]] && port=6000 + [[ $me == "2" && $i == "3" ]] && port=6001 + tmux new-window -n "client 192.168.2.${i}" -t bigtest "iperf3 -n 300000G -i 1 -p $port -c 192.168.2.${i}" +done +tmux attach -t bigtest diff --git a/contrib/wgserver.service b/contrib/wgserver.service new file mode 100644 index 0000000..dfce1e9 --- /dev/null +++ b/contrib/wgserver.service @@ -0,0 +1,15 @@ +[Unit] +Description=WireGuard Server + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/bin/ip link add dev wgserver type wireguard +ExecStart=/bin/ip address add 192.168.177.1/24 dev wgserver +ExecStart=/usr/bin/wg setconf wgserver /etc/wireguard-server.conf +ExecStart=/bin/ip link set up dev wgserver +ExecStop=/bin/sh -c 'umask 077; /usr/bin/wg showconf wgserver > /etc/wireguard-server.conf.tmp && mv /etc/wireguard-server.conf.tmp /etc/wireguard-server.conf' +ExecStop=/bin/ip link del dev wgserver + +[Install] +WantedBy=multi-user.target diff --git a/src/Kconfig b/src/Kconfig new file mode 100644 index 0000000..ca9f6d0 --- /dev/null +++ b/src/Kconfig @@ -0,0 +1,35 @@ +config WIREGUARD + bool "WireGuard secure VPN tunnel" + select NET_UDP_TUNNEL + default y + ---help--- + WireGuard is a secure, fast, and easy to use replacement for IPSec + that uses modern cryptography and clever networking tricks. It's + designed to be fairly general purpose and abstract enough to fit most + use cases, while at the same time remaining extremely simple to + configure. See www.wireguard.io for more info. + + It's safe to say Y or M here, as the driver is very lightweight and + is only in use when an administrator chooses to add an interface. + +config WIREGUARD_DEBUG + bool "Debugging checks and verbose messages for WireGuard" + depends on WIREGUARD + ---help--- + This will write log messages for handshake and other events + that occur for a WireGuard interface. It will also perform some + extra validation checks and unit tests at various points. This is + only useful for debugging. + + Say N here unless you know what you're doing. + +config WIREGUARD_PARALLEL + bool "Enable parallel engine" + depends on SMP && WIREGUARD + default y + ---help--- + This will allow WireGuard to utilize all CPU cores when encrypting + and decrypting packets. + + It's safe to say Y here, and you probably should, as the performance + improvements are substantial. diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..f658968 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,60 @@ +ifneq ($(KERNELRELEASE),) +ifneq ($(KBUILD_EXTMOD),) +CONFIG_WIREGUARD := m +endif + +obj-$(CONFIG_WIREGUARD) := wireguard.o +ccflags-y := -O3 -fvisibility=hidden +ccflags-$(CONFIG_WIREGUARD_DEBUG) := -DDEBUG -g +ifneq ($(KBUILD_EXTMOD),) +ifeq ($(CONFIG_WIREGUARD_PARALLEL),) +ifneq (,$(filter $(CONFIG_PADATA),y m)) +ccflags-y += -DCONFIG_WIREGUARD_PARALLEL=y +endif +endif +endif + +wireguard-y := main.o noise.o device.o peer.o timers.o data.o send.o receive.o socket.o config.o hashtables.o routing-table.o ratelimiter.o cookie.o +wireguard-y += crypto/curve25519.o crypto/chacha20poly1305.o crypto/blake2s.o crypto/siphash24.o +ifeq ($(CONFIG_X86_64),y) + wireguard-y += crypto/chacha20-ssse3-x86_64.o crypto/poly1305-sse2-x86_64.o +avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1$(comma)4)$(comma)%ymm2,yes,no) +ifeq ($(avx2_supported),yes) + wireguard-y += crypto/chacha20-avx2-x86_64.o crypto/poly1305-avx2-x86_64.o +endif +endif +else +KERNELDIR ?= /lib/modules/$(shell uname -r)/build +PWD := $(shell pwd) + +all: module tools +debug: module-debug tools + +module: + $(MAKE) -C $(KERNELDIR) M=$(PWD) modules + +module-debug: + $(MAKE) -C $(KERNELDIR) M=$(PWD) V=1 CONFIG_WIREGUARD_DEBUG=y modules + +clean: + $(MAKE) -C $(KERNELDIR) M=$(PWD) clean + $(MAKE) -C tools clean + +install: + $(MAKE) -C $(KERNELDIR) M=$(PWD) modules_install + depmod -a + $(MAKE) -C tools install + +tools: + $(MAKE) -C tools + +core-cloc: clean + cloc ./*.c ./*.h + +check: + $(MAKE) -C $(KERNELDIR) M=$(PWD) C=2 CF="-D__CHECK_ENDIAN__" CONFIG_WIREGUARD_DEBUG=y + +include debug.mk + +.PHONY: all module module-debug tools install clean core-cloc check +endif diff --git a/src/config.c b/src/config.c new file mode 100644 index 0000000..3df5d9e --- /dev/null +++ b/src/config.c @@ -0,0 +1,314 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include "wireguard.h" +#include "config.h" +#include "device.h" +#include "socket.h" +#include "packets.h" +#include "timers.h" +#include "hashtables.h" +#include "peer.h" +#include "uapi.h" + +static int set_peer_dst(struct wireguard_peer *peer, void *data) +{ + socket_set_peer_dst(peer); + return 0; +} + +static int set_device_port(struct wireguard_device *wg, u16 port) +{ + if (!port) + return -EINVAL; + socket_uninit(wg); + wg->incoming_port = port; + if (netdev_pub(wg)->flags & IFF_UP) { + peer_for_each_unlocked(wg, set_peer_dst, NULL); + return socket_init(wg); + } + return 0; +} + +static int set_ipmask(struct wireguard_peer *peer, void __user *user_ipmask) +{ + int ret = 0; + struct wgipmask in_ipmask; + + ret = copy_from_user(&in_ipmask, user_ipmask, sizeof(in_ipmask)); + if (ret) { + ret = -EFAULT; + return ret; + } + + if (in_ipmask.family == AF_INET && in_ipmask.cidr <= 32) + ret = routing_table_insert_v4(&peer->device->peer_routing_table, &in_ipmask.ip4, in_ipmask.cidr, peer); + else if (in_ipmask.family == AF_INET6 && in_ipmask.cidr <= 128) + ret = routing_table_insert_v6(&peer->device->peer_routing_table, &in_ipmask.ip6, in_ipmask.cidr, peer); + + return ret; +} + +static const uint8_t zeros[WG_KEY_LEN] = { 0 }; + +static int set_peer(struct wireguard_device *wg, void __user *user_peer, size_t *len) +{ + int ret = 0; + size_t i; + struct wgpeer in_peer; + void __user *user_ipmask; + struct wireguard_peer *peer = NULL; + + ret = copy_from_user(&in_peer, user_peer, sizeof(in_peer)); + if (ret) { + ret = -EFAULT; + return ret; + } + + if (!memcmp(zeros, in_peer.public_key, NOISE_PUBLIC_KEY_LEN)) + return -EINVAL; /* Can't add a peer with no public key. */ + + peer = pubkey_hashtable_lookup(&wg->peer_hashtable, in_peer.public_key); + if (!peer) { /* Peer doesn't exist yet. Add a new one. */ + if (in_peer.remove_me) + return -ENODEV; /* Tried to remove a non existing peer. */ + peer = peer_create(wg, in_peer.public_key); + if (!peer) + return -ENOMEM; + rcu_read_lock(); + peer = peer_get(peer); + rcu_read_unlock(); + if (!peer) { + pr_err("Peer disappeared while creating\n"); + return -EAGAIN; + } + if (netdev_pub(wg)->flags & IFF_UP) + timers_init_peer(peer); + } else + pr_debug("Peer %Lu (%pISpfsc) modified\n", peer->internal_id, &peer->endpoint_addr); + + if (in_peer.remove_me) { + peer_put(peer); + peer_remove(peer); + return 0; + } + + if (in_peer.endpoint.ss_family == AF_INET || in_peer.endpoint.ss_family == AF_INET6) + socket_set_peer_addr(peer, &in_peer.endpoint); + + if (in_peer.replace_ipmasks) + routing_table_remove_by_peer(&wg->peer_routing_table, peer); + for (i = 0, user_ipmask = user_peer + sizeof(struct wgpeer); i < in_peer.num_ipmasks; ++i, user_ipmask += sizeof(struct wgipmask)) { + ret = set_ipmask(peer, user_ipmask); + if (ret) + break; + } + + if (netdev_pub(wg)->flags & IFF_UP) + packet_send_queue(peer); + + peer_put(peer); + + if (!ret) + *len = sizeof(struct wgpeer) + (in_peer.num_ipmasks * sizeof(struct wgipmask)); + + return ret; +} + +int config_set_device(struct wireguard_device *wg, void __user *user_device) +{ + int ret = 0; + size_t i, offset; + struct wgdevice in_device; + void __user *user_peer; + + BUILD_BUG_ON(WG_KEY_LEN != NOISE_PUBLIC_KEY_LEN); + BUILD_BUG_ON(WG_KEY_LEN != NOISE_SYMMETRIC_KEY_LEN); + + mutex_lock(&wg->device_update_lock); + + ret = copy_from_user(&in_device, user_device, sizeof(in_device)); + if (ret) { + ret = -EFAULT; + goto out; + } + + if (in_device.port) { + ret = set_device_port(wg, in_device.port); + if (ret) + goto out; + } + + if (in_device.replace_peer_list) + peer_remove_all(wg); + + if (in_device.remove_private_key) + noise_set_static_identity_private_key(&wg->static_identity, NULL); + else if (memcmp(zeros, in_device.private_key, WG_KEY_LEN)) + noise_set_static_identity_private_key(&wg->static_identity, in_device.private_key); + + if (in_device.remove_preshared_key) + noise_set_static_identity_preshared_key(&wg->static_identity, NULL); + else if (memcmp(zeros, in_device.preshared_key, WG_KEY_LEN)) + noise_set_static_identity_preshared_key(&wg->static_identity, in_device.preshared_key); + + for (i = 0, offset = 0, user_peer = user_device + sizeof(struct wgdevice); i < in_device.num_peers; ++i, user_peer += offset) { + ret = set_peer(wg, user_peer, &offset); + if (ret) + break; + } + +out: + mutex_unlock(&wg->device_update_lock); + memzero_explicit(&in_device.private_key, NOISE_PUBLIC_KEY_LEN); + return ret; +} + +struct data_remaining { + void __user *data; + size_t out_len; + size_t count; +}; + +static inline int use_data(struct data_remaining *data, size_t size) +{ + if (data->out_len < size) + return -EMSGSIZE; + data->out_len -= size; + data->data += size; + ++data->count; + return 0; +} + +static int calculate_ipmasks_size(void *ctx, struct wireguard_peer *peer, union nf_inet_addr ip, uint8_t cidr, int family) +{ + size_t *count = ctx; + *count += sizeof(struct wgipmask); + return 0; +} + +static size_t calculate_peers_size(struct wireguard_device *wg) +{ + size_t len = peer_total_count(wg) * sizeof(struct wgpeer); + routing_table_walk_ips(&wg->peer_routing_table, &len, calculate_ipmasks_size); + return len; +} + +static int populate_ipmask(void *ctx, union nf_inet_addr ip, uint8_t cidr, int family) +{ + int ret; + struct data_remaining *data = ctx; + void __user *uipmask = data->data; + struct wgipmask out_ipmask; + + memset(&out_ipmask, 0, sizeof(struct wgipmask)); + + ret = use_data(data, sizeof(struct wgipmask)); + if (ret) + return ret; + + out_ipmask.cidr = cidr; + out_ipmask.family = family; + if (family == AF_INET) + out_ipmask.ip4 = ip.in; + else if (family == AF_INET6) + out_ipmask.ip6 = ip.in6; + + ret = copy_to_user(uipmask, &out_ipmask, sizeof(out_ipmask)); + if (ret) + ret = -EFAULT; + return ret; +} + + +static int populate_peer(struct wireguard_peer *peer, void *ctx) +{ + int ret = 0; + struct data_remaining *data = ctx; + void __user *upeer = data->data; + struct wgpeer out_peer; + struct data_remaining ipmasks_data = { NULL }; + + memset(&out_peer, 0, sizeof(struct wgpeer)); + + ret = use_data(data, sizeof(struct wgpeer)); + if (ret) + return ret; + + memcpy(out_peer.public_key, peer->handshake.remote_static, NOISE_PUBLIC_KEY_LEN); + read_lock_bh(&peer->endpoint_lock); + out_peer.endpoint = peer->endpoint_addr; + read_unlock_bh(&peer->endpoint_lock); + out_peer.last_handshake_time = peer->walltime_last_handshake; + out_peer.tx_bytes = peer->tx_bytes; + out_peer.rx_bytes = peer->rx_bytes; + + ipmasks_data.out_len = data->out_len; + ipmasks_data.data = data->data; + ret = routing_table_walk_ips_by_peer_sleepable(&peer->device->peer_routing_table, &ipmasks_data, peer, populate_ipmask); + if (ret) + return ret; + data->out_len = ipmasks_data.out_len; + data->data = ipmasks_data.data; + out_peer.num_ipmasks = ipmasks_data.count; + + ret = copy_to_user(upeer, &out_peer, sizeof(out_peer)); + if (ret) + ret = -EFAULT; + return ret; +} + + +int config_get_device(struct wireguard_device *wg, void __user *udevice) +{ + int ret = 0; + struct net_device *dev = netdev_pub(wg); + struct data_remaining peer_data = { NULL }; + struct wgdevice out_device; + struct wgdevice in_device; + + BUILD_BUG_ON(WG_KEY_LEN != NOISE_PUBLIC_KEY_LEN); + BUILD_BUG_ON(WG_KEY_LEN != NOISE_SYMMETRIC_KEY_LEN); + + memset(&out_device, 0, sizeof(struct wgdevice)); + + mutex_lock(&wg->device_update_lock); + + if (!udevice) { + ret = calculate_peers_size(wg); + goto out; + } + + ret = copy_from_user(&in_device, udevice, sizeof(in_device)); + if (ret) { + ret = -EFAULT; + goto out; + } + + out_device.port = wg->incoming_port; + strncpy(out_device.interface, dev->name, IFNAMSIZ - 1); + out_device.interface[IFNAMSIZ - 1] = 0; + + down_read(&wg->static_identity.lock); + if (wg->static_identity.has_identity) { + memcpy(out_device.private_key, wg->static_identity.static_private, WG_KEY_LEN); + memcpy(out_device.public_key, wg->static_identity.static_public, WG_KEY_LEN); + memcpy(out_device.preshared_key, wg->static_identity.preshared_key, WG_KEY_LEN); + } + up_read(&wg->static_identity.lock); + + peer_data.out_len = in_device.peers_size; + peer_data.data = udevice + sizeof(struct wgdevice); + ret = peer_for_each_unlocked(wg, populate_peer, &peer_data); + if (ret) + goto out; + out_device.num_peers = peer_data.count; + + ret = copy_to_user(udevice, &out_device, sizeof(out_device)); + if (ret) + ret = -EFAULT; + +out: + mutex_unlock(&wg->device_update_lock); + memzero_explicit(&out_device.private_key, NOISE_PUBLIC_KEY_LEN); + return ret; +} diff --git a/src/config.h b/src/config.h new file mode 100644 index 0000000..9678595 --- /dev/null +++ b/src/config.h @@ -0,0 +1,11 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef WGCONFIG_H +#define WGCONFIG_H + +struct wireguard_device; + +int config_get_device(struct wireguard_device *wg, void __user *udevice); +int config_set_device(struct wireguard_device *wg, void __user *udevice); + +#endif diff --git a/src/cookie.c b/src/cookie.c new file mode 100644 index 0000000..f02a7e8 --- /dev/null +++ b/src/cookie.c @@ -0,0 +1,237 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include "wireguard.h" +#include "cookie.h" +#include "messages.h" +#include "crypto/blake2s.h" +#include "crypto/chacha20poly1305.h" +#include +#include +#include + +int cookie_checker_init(struct cookie_checker *checker, struct wireguard_device *wg) +{ + int ret = ratelimiter_init(&checker->ratelimiter, wg); + if (ret) + return ret; + init_rwsem(&checker->secret_lock); + checker->secret_birthdate = get_jiffies_64(); + get_random_bytes(checker->secret, NOISE_HASH_LEN); + checker->device = wg; + return 0; +} + +void cookie_checker_uninit(struct cookie_checker *checker) +{ + ratelimiter_uninit(&checker->ratelimiter); +} + +void cookie_init(struct cookie *cookie) +{ + memset(cookie, 0, sizeof(struct cookie)); + init_rwsem(&cookie->lock); +} + +static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len, const u8 pubkey[NOISE_PUBLIC_KEY_LEN], const u8 psk[NOISE_SYMMETRIC_KEY_LEN]) +{ + struct blake2s_state state; + len = len - sizeof(struct message_macs) + offsetof(struct message_macs, mac1); + + if (psk) + blake2s_init_key(&state, COOKIE_LEN, psk, NOISE_SYMMETRIC_KEY_LEN); + else + blake2s_init(&state, COOKIE_LEN); + blake2s_update(&state, pubkey, NOISE_PUBLIC_KEY_LEN); + blake2s_update(&state, message, len); + blake2s_final(&state, mac1, COOKIE_LEN); +} + +static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len, const u8 cookie[COOKIE_LEN]) +{ + len = len - sizeof(struct message_macs) + offsetof(struct message_macs, mac2); + blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN); +} + +static inline const u8 *get_secret(struct cookie_checker *checker) +{ + if (!time_is_after_jiffies64(checker->secret_birthdate + COOKIE_SECRET_MAX_AGE)) { + down_write(&checker->secret_lock); + checker->secret_birthdate = get_jiffies_64(); + get_random_bytes(checker->secret, NOISE_HASH_LEN); + up_write(&checker->secret_lock); + } + down_read(&checker->secret_lock); + return checker->secret; +} + +static inline void put_secret(struct cookie_checker *checker) +{ + up_read(&checker->secret_lock); +} + +static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb, struct cookie_checker *checker) +{ + struct blake2s_state state; + const u8 *secret; + + secret = get_secret(checker); + + blake2s_init_key(&state, COOKIE_LEN, secret, NOISE_HASH_LEN); + if (ip_hdr(skb)->version == 4) + blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr, sizeof(struct in_addr)); + else if (ip_hdr(skb)->version == 6) + blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr, sizeof(struct in6_addr)); + blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16)); + blake2s_final(&state, cookie, COOKIE_LEN); + + put_secret(checker); +} + +enum cookie_mac_state cookie_validate_packet(struct cookie_checker *checker, struct sk_buff *skb, void *data_start, size_t data_len, bool check_cookie) +{ + u8 computed_mac[COOKIE_LEN]; + u8 cookie[COOKIE_LEN]; + enum cookie_mac_state ret; + struct message_macs *macs = (struct message_macs *)(data_start + data_len - sizeof(struct message_macs)); + + ret = INVALID_MAC; + down_read(&checker->device->static_identity.lock); + if (unlikely(!checker->device->static_identity.has_identity)) { + up_read(&checker->device->static_identity.lock); + goto out; + } + compute_mac1(computed_mac, data_start, data_len, checker->device->static_identity.static_public, checker->device->static_identity.has_psk ? checker->device->static_identity.preshared_key : NULL); + up_read(&checker->device->static_identity.lock); + if (crypto_memneq(computed_mac, macs->mac1, COOKIE_LEN)) + goto out; + + ret = VALID_MAC_BUT_NO_COOKIE; + + if (!check_cookie) + goto out; + + make_cookie(cookie, skb, checker); + + compute_mac2(computed_mac, data_start, data_len, cookie); + if (crypto_memneq(computed_mac, macs->mac2, COOKIE_LEN)) + goto out; + + ret = VALID_MAC_WITH_COOKIE_BUT_RATELIMITED; + if (!ratelimiter_allow(&checker->ratelimiter, skb)) + goto out; + + ret = VALID_MAC_WITH_COOKIE; + +out: + memzero_explicit(computed_mac, COOKIE_LEN); + memzero_explicit(cookie, COOKIE_LEN); + return ret; +} + +void cookie_add_mac_to_packet(void *message, size_t len, struct wireguard_peer *peer) +{ + struct message_macs *macs = message + len - sizeof(struct message_macs); + + down_read(&peer->device->static_identity.lock); + if (unlikely(!peer->device->static_identity.has_identity)) { + memset(macs, 0, sizeof(struct message_macs)); + up_read(&peer->device->static_identity.lock); + return; + } + compute_mac1(macs->mac1, message, len, peer->handshake.remote_static, peer->device->static_identity.has_psk ? peer->device->static_identity.preshared_key : NULL); + up_read(&peer->device->static_identity.lock); + + down_write(&peer->latest_cookie.lock); + memcpy(peer->latest_cookie.last_mac1_sent, macs->mac1, COOKIE_LEN); + peer->latest_cookie.have_sent_mac1 = true; + up_write(&peer->latest_cookie.lock); + + down_read(&peer->latest_cookie.lock); + if (peer->latest_cookie.is_valid && time_is_after_jiffies64(peer->latest_cookie.birthdate + COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY)) + compute_mac2(macs->mac2, message, len, peer->latest_cookie.cookie); + else + memset(macs->mac2, 0, COOKIE_LEN); + up_read(&peer->latest_cookie.lock); +} + +void cookie_message_create(struct message_handshake_cookie *dst, struct sk_buff *skb, void *data_start, size_t data_len, __le32 index, struct cookie_checker *checker) +{ + struct message_macs *macs = (struct message_macs *)(data_start + data_len - sizeof(struct message_macs)); + struct blake2s_state state; + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + u8 cookie[COOKIE_LEN]; + + dst->header.type = MESSAGE_HANDSHAKE_COOKIE; + dst->receiver_index = index; + get_random_bytes(dst->salt, COOKIE_SALT_LEN); + + down_read(&checker->device->static_identity.lock); + if (unlikely(!checker->device->static_identity.has_identity)) { + memset(dst, 0, sizeof(struct message_handshake_cookie)); + up_read(&checker->device->static_identity.lock); + return; + } + if (checker->device->static_identity.has_psk) + blake2s_init_key(&state, NOISE_SYMMETRIC_KEY_LEN, checker->device->static_identity.preshared_key, NOISE_SYMMETRIC_KEY_LEN); + else + blake2s_init(&state, NOISE_SYMMETRIC_KEY_LEN); + blake2s_update(&state, checker->device->static_identity.static_public, NOISE_PUBLIC_KEY_LEN); + up_read(&checker->device->static_identity.lock); + blake2s_update(&state, dst->salt, COOKIE_SALT_LEN); + blake2s_final(&state, key, NOISE_SYMMETRIC_KEY_LEN); + + make_cookie(cookie, skb, checker); + chacha20poly1305_encrypt(dst->encrypted_cookie, cookie, COOKIE_LEN, macs->mac1, COOKIE_LEN, 0, key); + + memzero_explicit(key, NOISE_HASH_LEN); + memzero_explicit(cookie, COOKIE_LEN); +} + +void cookie_message_consume(struct message_handshake_cookie *src, struct wireguard_device *wg) +{ + struct blake2s_state state; + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + u8 cookie[COOKIE_LEN]; + struct index_hashtable_entry *entry; + + entry = index_hashtable_lookup(&wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE | INDEX_HASHTABLE_KEYPAIR, src->receiver_index); + if (!unlikely(entry)) + return; + + down_read(&entry->peer->latest_cookie.lock); + if (unlikely(!entry->peer->latest_cookie.have_sent_mac1)) { + up_read(&entry->peer->latest_cookie.lock); + goto out; + } + up_read(&entry->peer->latest_cookie.lock); + + down_read(&wg->static_identity.lock); + if (unlikely(!wg->static_identity.has_identity)) { + up_read(&wg->static_identity.lock); + goto out; + } + if (wg->static_identity.has_psk) + blake2s_init_key(&state, NOISE_SYMMETRIC_KEY_LEN, wg->static_identity.preshared_key, NOISE_SYMMETRIC_KEY_LEN); + else + blake2s_init(&state, NOISE_SYMMETRIC_KEY_LEN); + up_read(&wg->static_identity.lock); + + blake2s_update(&state, entry->peer->handshake.remote_static, NOISE_PUBLIC_KEY_LEN); + blake2s_update(&state, src->salt, COOKIE_SALT_LEN); + blake2s_final(&state, key, NOISE_SYMMETRIC_KEY_LEN); + + down_write(&entry->peer->latest_cookie.lock); + if (chacha20poly1305_decrypt(cookie, src->encrypted_cookie, sizeof(src->encrypted_cookie), entry->peer->latest_cookie.last_mac1_sent, COOKIE_LEN, 0, key)) { + memcpy(entry->peer->latest_cookie.cookie, cookie, COOKIE_LEN); + entry->peer->latest_cookie.birthdate = get_jiffies_64(); + entry->peer->latest_cookie.is_valid = true; + entry->peer->latest_cookie.have_sent_mac1 = false; + } else + net_dbg_ratelimited("Could not decrypt invalid cookie response\n"); + up_write(&entry->peer->latest_cookie.lock); + +out: + peer_put(entry->peer); + memzero_explicit(key, NOISE_HASH_LEN); + memzero_explicit(cookie, COOKIE_LEN); +} diff --git a/src/cookie.h b/src/cookie.h new file mode 100644 index 0000000..b9524e6 --- /dev/null +++ b/src/cookie.h @@ -0,0 +1,55 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef WGCOOKIE +#define WGCOOKIE + +#include "noise.h" +#include "peer.h" +#include "ratelimiter.h" +#include + +enum { + COOKIE_SECRET_MAX_AGE = 2 * 60 * HZ, + COOKIE_SECRET_LATENCY = 5 * HZ, + COOKIE_SALT_LEN = 32, + COOKIE_LEN = 16 +}; + +struct wireguard_device; +struct sk_buff; + +struct cookie_checker { + u8 secret[NOISE_HASH_LEN]; + uint64_t secret_birthdate; + struct rw_semaphore secret_lock; + struct ratelimiter ratelimiter; + struct wireguard_device *device; +}; + +struct cookie { + uint64_t birthdate; + bool is_valid; + u8 cookie[COOKIE_LEN]; + bool have_sent_mac1; + u8 last_mac1_sent[COOKIE_LEN]; + struct rw_semaphore lock; +}; + +enum cookie_mac_state { + INVALID_MAC, + VALID_MAC_BUT_NO_COOKIE, + VALID_MAC_WITH_COOKIE_BUT_RATELIMITED, + VALID_MAC_WITH_COOKIE +}; + +int cookie_checker_init(struct cookie_checker *checker, struct wireguard_device *wg); +void cookie_checker_uninit(struct cookie_checker *checker); +void cookie_init(struct cookie *cookie); + +enum cookie_mac_state cookie_validate_packet(struct cookie_checker *checker, struct sk_buff *skb, void *data_start, size_t data_len, bool check_cookie); +void cookie_add_mac_to_packet(void *message, size_t len, struct wireguard_peer *peer); + +void cookie_message_create(struct message_handshake_cookie *src, struct sk_buff *skb, void *data_start, size_t data_len, __le32 index, struct cookie_checker *checker); +void cookie_message_consume(struct message_handshake_cookie *src, struct wireguard_device *wg); + +#endif diff --git a/src/crypto/blake2s.c b/src/crypto/blake2s.c new file mode 100644 index 0000000..0a7170e --- /dev/null +++ b/src/crypto/blake2s.c @@ -0,0 +1,840 @@ +/* Original author: Samuel Neves + * + * Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "../wireguard.h" +#include "blake2s.h" +#include +#include +#include + +typedef struct { + uint8_t digest_length; + uint8_t key_length; + uint8_t fanout; + uint8_t depth; + uint32_t leaf_length; + uint8_t node_offset[6]; + uint8_t node_depth; + uint8_t inner_length; + uint8_t salt[8]; + uint8_t personal[8]; +} __packed blake2s_param; + +static const uint32_t blake2s_iv[8] = { + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL +}; + +static const uint8_t blake2s_sigma[10][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, +}; + +static inline uint32_t rotr32(const uint32_t w, const uint8_t c) +{ + return (w >> c) | (w << (32 - c)); +} + +static inline u32 le32_to_cpuvp(const void *p) +{ + return le32_to_cpup(p); +} + +static inline void blake2s_set_lastblock(struct blake2s_state *state) +{ + if (state->last_node) + state->f[1] = -1; + state->f[0] = -1; +} + +static inline void blake2s_increment_counter(struct blake2s_state *state, const uint32_t inc) +{ + state->t[0] += inc; + state->t[1] += (state->t[0] < inc); +} + +/* init2 xors IV with input parameter block */ +__attribute__((optimize("unroll-loops"))) +static inline void blake2s_init_param(struct blake2s_state *state, const blake2s_param *param) +{ + const uint32_t *p; + int i; + memset(state, 0, sizeof(struct blake2s_state)); + for (i = 0; i < 8; ++i) + state->h[i] = blake2s_iv[i]; + p = (const uint32_t *)param; + /* IV XOR ParamBlock */ + for (i = 0; i < 8; ++i) + state->h[i] ^= le32_to_cpuvp(&p[i]); +} + +void blake2s_init(struct blake2s_state *state, const uint8_t outlen) +{ + blake2s_param param = { + .digest_length = outlen, + .fanout = 1, + .depth = 1 + }; + +#ifdef DEBUG + BUG_ON(!outlen || outlen > BLAKE2S_OUTBYTES); +#endif + blake2s_init_param(state, ¶m); +} + +void blake2s_init_key(struct blake2s_state *state, const uint8_t outlen, const void *key, const uint8_t keylen) +{ + blake2s_param param = { + .digest_length = outlen, + .key_length = keylen, + .fanout = 1, + .depth = 1 + }; + uint8_t block[BLAKE2S_BLOCKBYTES] = { 0 }; + +#ifdef DEBUG + BUG_ON(!outlen || outlen > BLAKE2S_OUTBYTES || !key || !keylen || keylen > BLAKE2S_KEYBYTES); +#endif + blake2s_init_param(state, ¶m); + memcpy(block, key, keylen); + blake2s_update(state, block, BLAKE2S_BLOCKBYTES); + memzero_explicit(block, BLAKE2S_BLOCKBYTES); +} + +__attribute__((optimize("unroll-loops"))) +static inline void blake2s_compress(struct blake2s_state *state, const uint8_t block[BLAKE2S_BLOCKBYTES]) +{ + uint32_t m[16]; + uint32_t v[16]; + int i; + + for (i = 0; i < 16; ++i) + m[i] = le32_to_cpuvp(block + i * sizeof(m[i])); + + for (i = 0; i < 8; ++i) + v[i] = state->h[i]; + + v[8] = blake2s_iv[0]; + v[9] = blake2s_iv[1]; + v[10] = blake2s_iv[2]; + v[11] = blake2s_iv[3]; + v[12] = state->t[0] ^ blake2s_iv[4]; + v[13] = state->t[1] ^ blake2s_iv[5]; + v[14] = state->f[0] ^ blake2s_iv[6]; + v[15] = state->f[1] ^ blake2s_iv[7]; +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2s_sigma[r][2 * i + 0]]; \ + d = rotr32(d ^ a, 16); \ + c = c + d; \ + b = rotr32(b ^ c, 12); \ + a = a + b + m[blake2s_sigma[r][2 * i + 1]]; \ + d = rotr32(d ^ a, 8); \ + c = c + d; \ + b = rotr32(b ^ c, 7); \ + } while(0) +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ +} while(0) + ROUND(0); + ROUND(1); + ROUND(2); + ROUND(3); + ROUND(4); + ROUND(5); + ROUND(6); + ROUND(7); + ROUND(8); + ROUND(9); + + for (i = 0; i < 8; ++i) + state->h[i] = state->h[i] ^ v[i] ^ v[i + 8]; +#undef G +#undef ROUND +} + +void blake2s_update(struct blake2s_state *state, const uint8_t *in, uint64_t inlen) +{ + size_t left, fill; + while (inlen > 0) { + left = state->buflen; + fill = 2 * BLAKE2S_BLOCKBYTES - left; + + if (inlen > fill) { + memcpy(state->buf + left, in, fill); // Fill buffer + state->buflen += fill; + blake2s_increment_counter(state, BLAKE2S_BLOCKBYTES); + blake2s_compress(state, state->buf); // Compress + memcpy(state->buf, state->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES);// Shift buffer left + state->buflen -= BLAKE2S_BLOCKBYTES; + in += fill; + inlen -= fill; + } else { // inlen <= fill + memcpy(state->buf + left, in, inlen); + state->buflen += inlen; // Be lazy, do not compress + in += inlen; + inlen -= inlen; + } + } +} + +__attribute__((optimize("unroll-loops"))) +void blake2s_final(struct blake2s_state *state, uint8_t *out, uint8_t outlen) +{ + uint8_t buffer[BLAKE2S_OUTBYTES] = { 0 }; + int i; + +#ifdef DEBUG + BUG_ON(!out || !outlen || outlen > BLAKE2S_OUTBYTES); +#endif + + if (state->buflen > BLAKE2S_BLOCKBYTES) { + blake2s_increment_counter(state, BLAKE2S_BLOCKBYTES); + blake2s_compress(state, state->buf); + state->buflen -= BLAKE2S_BLOCKBYTES; + memcpy(state->buf, state->buf + BLAKE2S_BLOCKBYTES, state->buflen); + } + + blake2s_increment_counter(state, (uint32_t) state->buflen); + blake2s_set_lastblock(state); + memset(state->buf + state->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - state->buflen); /* Padding */ + blake2s_compress(state, state->buf); + + for (i = 0; i < 8; ++i) /* output full hash to temp buffer */ + *(__le32 *)(buffer + sizeof(state->h[i]) * i) = cpu_to_le32(state->h[i]); + + memcpy(out, buffer, outlen); + + /* Burn state from stack */ + memzero_explicit(buffer, BLAKE2S_OUTBYTES); + memzero_explicit(state, sizeof(struct blake2s_state)); +} + +void blake2s(uint8_t *out, const uint8_t *in, const uint8_t *key, const uint8_t outlen, uint64_t inlen, const uint8_t keylen) +{ + struct blake2s_state state; + +#ifdef DEBUG + BUG_ON((!in && inlen > 0) || !out || !outlen || outlen > BLAKE2S_OUTBYTES || keylen > BLAKE2S_KEYBYTES); +#endif + + if (keylen > 0 && key) + blake2s_init_key(&state, outlen, key, keylen); + else + blake2s_init(&state, outlen); + + blake2s_update(&state, in, inlen); + blake2s_final(&state, out, outlen); +} + +__attribute__((optimize("unroll-loops"))) +void blake2s_hmac(uint8_t *out, const uint8_t *in, const uint8_t *key, const uint8_t outlen, const uint64_t inlen, const uint64_t keylen) +{ + struct blake2s_state state; + uint8_t o_key[BLAKE2S_BLOCKBYTES] = { 0 }; + uint8_t i_key[BLAKE2S_BLOCKBYTES] = { 0 }; + uint8_t i_hash[BLAKE2S_OUTBYTES]; + int i; + + if (keylen > BLAKE2S_BLOCKBYTES) { + blake2s_init(&state, BLAKE2S_OUTBYTES); + blake2s_update(&state, key, keylen); + blake2s_final(&state, o_key, BLAKE2S_OUTBYTES); + memcpy(i_key, o_key, BLAKE2S_OUTBYTES); + } else { + memcpy(o_key, key, keylen); + memcpy(i_key, key, keylen); + } + + for (i = 0; i < BLAKE2S_BLOCKBYTES; ++i) { + o_key[i] ^= 0x5c; + i_key[i] ^= 0x36; + } + + blake2s_init(&state, BLAKE2S_OUTBYTES); + blake2s_update(&state, i_key, BLAKE2S_BLOCKBYTES); + blake2s_update(&state, in, inlen); + blake2s_final(&state, i_hash, BLAKE2S_OUTBYTES); + + blake2s_init(&state, BLAKE2S_OUTBYTES); + blake2s_update(&state, o_key, BLAKE2S_BLOCKBYTES); + blake2s_update(&state, i_hash, BLAKE2S_OUTBYTES); + blake2s_final(&state, i_hash, BLAKE2S_OUTBYTES); + + memcpy(out, i_hash, outlen); + memzero_explicit(o_key, BLAKE2S_BLOCKBYTES); + memzero_explicit(i_key, BLAKE2S_BLOCKBYTES); + memzero_explicit(i_hash, BLAKE2S_OUTBYTES); +} + +#ifdef DEBUG +static const uint8_t blake2s_testvecs[][BLAKE2S_OUTBYTES] = { + { 0x69, 0x21, 0x7A, 0x30, 0x79, 0x90, 0x80, 0x94, 0xE1, 0x11, 0x21, 0xD0, 0x42, 0x35, 0x4A, 0x7C, 0x1F, 0x55, 0xB6, 0x48, 0x2C, 0xA1, 0xA5, 0x1E, 0x1B, 0x25, 0x0D, 0xFD, 0x1E, 0xD0, 0xEE, 0xF9 }, + { 0xE3, 0x4D, 0x74, 0xDB, 0xAF, 0x4F, 0xF4, 0xC6, 0xAB, 0xD8, 0x71, 0xCC, 0x22, 0x04, 0x51, 0xD2, 0xEA, 0x26, 0x48, 0x84, 0x6C, 0x77, 0x57, 0xFB, 0xAA, 0xC8, 0x2F, 0xE5, 0x1A, 0xD6, 0x4B, 0xEA }, + { 0xDD, 0xAD, 0x9A, 0xB1, 0x5D, 0xAC, 0x45, 0x49, 0xBA, 0x42, 0xF4, 0x9D, 0x26, 0x24, 0x96, 0xBE, 0xF6, 0xC0, 0xBA, 0xE1, 0xDD, 0x34, 0x2A, 0x88, 0x08, 0xF8, 0xEA, 0x26, 0x7C, 0x6E, 0x21, 0x0C }, + { 0xE8, 0xF9, 0x1C, 0x6E, 0xF2, 0x32, 0xA0, 0x41, 0x45, 0x2A, 0xB0, 0xE1, 0x49, 0x07, 0x0C, 0xDD, 0x7D, 0xD1, 0x76, 0x9E, 0x75, 0xB3, 0xA5, 0x92, 0x1B, 0xE3, 0x78, 0x76, 0xC4, 0x5C, 0x99, 0x00 }, + { 0x0C, 0xC7, 0x0E, 0x00, 0x34, 0x8B, 0x86, 0xBA, 0x29, 0x44, 0xD0, 0xC3, 0x20, 0x38, 0xB2, 0x5C, 0x55, 0x58, 0x4F, 0x90, 0xDF, 0x23, 0x04, 0xF5, 0x5F, 0xA3, 0x32, 0xAF, 0x5F, 0xB0, 0x1E, 0x20 }, + { 0xEC, 0x19, 0x64, 0x19, 0x10, 0x87, 0xA4, 0xFE, 0x9D, 0xF1, 0xC7, 0x95, 0x34, 0x2A, 0x02, 0xFF, 0xC1, 0x91, 0xA5, 0xB2, 0x51, 0x76, 0x48, 0x56, 0xAE, 0x5B, 0x8B, 0x57, 0x69, 0xF0, 0xC6, 0xCD }, + { 0xE1, 0xFA, 0x51, 0x61, 0x8D, 0x7D, 0xF4, 0xEB, 0x70, 0xCF, 0x0D, 0x5A, 0x9E, 0x90, 0x6F, 0x80, 0x6E, 0x9D, 0x19, 0xF7, 0xF4, 0xF0, 0x1E, 0x3B, 0x62, 0x12, 0x88, 0xE4, 0x12, 0x04, 0x05, 0xD6 }, + { 0x59, 0x80, 0x01, 0xFA, 0xFB, 0xE8, 0xF9, 0x4E, 0xC6, 0x6D, 0xC8, 0x27, 0xD0, 0x12, 0xCF, 0xCB, 0xBA, 0x22, 0x28, 0x56, 0x9F, 0x44, 0x8E, 0x89, 0xEA, 0x22, 0x08, 0xC8, 0xBF, 0x76, 0x92, 0x93 }, + { 0xC7, 0xE8, 0x87, 0xB5, 0x46, 0x62, 0x36, 0x35, 0xE9, 0x3E, 0x04, 0x95, 0x59, 0x8F, 0x17, 0x26, 0x82, 0x19, 0x96, 0xC2, 0x37, 0x77, 0x05, 0xB9, 0x3A, 0x1F, 0x63, 0x6F, 0x87, 0x2B, 0xFA, 0x2D }, + { 0xC3, 0x15, 0xA4, 0x37, 0xDD, 0x28, 0x06, 0x2A, 0x77, 0x0D, 0x48, 0x19, 0x67, 0x13, 0x6B, 0x1B, 0x5E, 0xB8, 0x8B, 0x21, 0xEE, 0x53, 0xD0, 0x32, 0x9C, 0x58, 0x97, 0x12, 0x6E, 0x9D, 0xB0, 0x2C }, + { 0xBB, 0x47, 0x3D, 0xED, 0xDC, 0x05, 0x5F, 0xEA, 0x62, 0x28, 0xF2, 0x07, 0xDA, 0x57, 0x53, 0x47, 0xBB, 0x00, 0x40, 0x4C, 0xD3, 0x49, 0xD3, 0x8C, 0x18, 0x02, 0x63, 0x07, 0xA2, 0x24, 0xCB, 0xFF }, + { 0x68, 0x7E, 0x18, 0x73, 0xA8, 0x27, 0x75, 0x91, 0xBB, 0x33, 0xD9, 0xAD, 0xF9, 0xA1, 0x39, 0x12, 0xEF, 0xEF, 0xE5, 0x57, 0xCA, 0xFC, 0x39, 0xA7, 0x95, 0x26, 0x23, 0xE4, 0x72, 0x55, 0xF1, 0x6D }, + { 0x1A, 0xC7, 0xBA, 0x75, 0x4D, 0x6E, 0x2F, 0x94, 0xE0, 0xE8, 0x6C, 0x46, 0xBF, 0xB2, 0x62, 0xAB, 0xBB, 0x74, 0xF4, 0x50, 0xEF, 0x45, 0x6D, 0x6B, 0x4D, 0x97, 0xAA, 0x80, 0xCE, 0x6D, 0xA7, 0x67 }, + { 0x01, 0x2C, 0x97, 0x80, 0x96, 0x14, 0x81, 0x6B, 0x5D, 0x94, 0x94, 0x47, 0x7D, 0x4B, 0x68, 0x7D, 0x15, 0xB9, 0x6E, 0xB6, 0x9C, 0x0E, 0x80, 0x74, 0xA8, 0x51, 0x6F, 0x31, 0x22, 0x4B, 0x5C, 0x98 }, + { 0x91, 0xFF, 0xD2, 0x6C, 0xFA, 0x4D, 0xA5, 0x13, 0x4C, 0x7E, 0xA2, 0x62, 0xF7, 0x88, 0x9C, 0x32, 0x9F, 0x61, 0xF6, 0xA6, 0x57, 0x22, 0x5C, 0xC2, 0x12, 0xF4, 0x00, 0x56, 0xD9, 0x86, 0xB3, 0xF4 }, + { 0xD9, 0x7C, 0x82, 0x8D, 0x81, 0x82, 0xA7, 0x21, 0x80, 0xA0, 0x6A, 0x78, 0x26, 0x83, 0x30, 0x67, 0x3F, 0x7C, 0x4E, 0x06, 0x35, 0x94, 0x7C, 0x04, 0xC0, 0x23, 0x23, 0xFD, 0x45, 0xC0, 0xA5, 0x2D }, + { 0xEF, 0xC0, 0x4C, 0xDC, 0x39, 0x1C, 0x7E, 0x91, 0x19, 0xBD, 0x38, 0x66, 0x8A, 0x53, 0x4E, 0x65, 0xFE, 0x31, 0x03, 0x6D, 0x6A, 0x62, 0x11, 0x2E, 0x44, 0xEB, 0xEB, 0x11, 0xF9, 0xC5, 0x70, 0x80 }, + { 0x99, 0x2C, 0xF5, 0xC0, 0x53, 0x44, 0x2A, 0x5F, 0xBC, 0x4F, 0xAF, 0x58, 0x3E, 0x04, 0xE5, 0x0B, 0xB7, 0x0D, 0x2F, 0x39, 0xFB, 0xB6, 0xA5, 0x03, 0xF8, 0x9E, 0x56, 0xA6, 0x3E, 0x18, 0x57, 0x8A }, + { 0x38, 0x64, 0x0E, 0x9F, 0x21, 0x98, 0x3E, 0x67, 0xB5, 0x39, 0xCA, 0xCC, 0xAE, 0x5E, 0xCF, 0x61, 0x5A, 0xE2, 0x76, 0x4F, 0x75, 0xA0, 0x9C, 0x9C, 0x59, 0xB7, 0x64, 0x83, 0xC1, 0xFB, 0xC7, 0x35 }, + { 0x21, 0x3D, 0xD3, 0x4C, 0x7E, 0xFE, 0x4F, 0xB2, 0x7A, 0x6B, 0x35, 0xF6, 0xB4, 0x00, 0x0D, 0x1F, 0xE0, 0x32, 0x81, 0xAF, 0x3C, 0x72, 0x3E, 0x5C, 0x9F, 0x94, 0x74, 0x7A, 0x5F, 0x31, 0xCD, 0x3B }, + { 0xEC, 0x24, 0x6E, 0xEE, 0xB9, 0xCE, 0xD3, 0xF7, 0xAD, 0x33, 0xED, 0x28, 0x66, 0x0D, 0xD9, 0xBB, 0x07, 0x32, 0x51, 0x3D, 0xB4, 0xE2, 0xFA, 0x27, 0x8B, 0x60, 0xCD, 0xE3, 0x68, 0x2A, 0x4C, 0xCD }, + { 0xAC, 0x9B, 0x61, 0xD4, 0x46, 0x64, 0x8C, 0x30, 0x05, 0xD7, 0x89, 0x2B, 0xF3, 0xA8, 0x71, 0x9F, 0x4C, 0x81, 0x81, 0xCF, 0xDC, 0xBC, 0x2B, 0x79, 0xFE, 0xF1, 0x0A, 0x27, 0x9B, 0x91, 0x10, 0x95 }, + { 0x7B, 0xF8, 0xB2, 0x29, 0x59, 0xE3, 0x4E, 0x3A, 0x43, 0xF7, 0x07, 0x92, 0x23, 0xE8, 0x3A, 0x97, 0x54, 0x61, 0x7D, 0x39, 0x1E, 0x21, 0x3D, 0xFD, 0x80, 0x8E, 0x41, 0xB9, 0xBE, 0xAD, 0x4C, 0xE7 }, + { 0x68, 0xD4, 0xB5, 0xD4, 0xFA, 0x0E, 0x30, 0x2B, 0x64, 0xCC, 0xC5, 0xAF, 0x79, 0x29, 0x13, 0xAC, 0x4C, 0x88, 0xEC, 0x95, 0xC0, 0x7D, 0xDF, 0x40, 0x69, 0x42, 0x56, 0xEB, 0x88, 0xCE, 0x9F, 0x3D }, + { 0xB2, 0xC2, 0x42, 0x0F, 0x05, 0xF9, 0xAB, 0xE3, 0x63, 0x15, 0x91, 0x93, 0x36, 0xB3, 0x7E, 0x4E, 0x0F, 0xA3, 0x3F, 0xF7, 0xE7, 0x6A, 0x49, 0x27, 0x67, 0x00, 0x6F, 0xDB, 0x5D, 0x93, 0x54, 0x62 }, + { 0x13, 0x4F, 0x61, 0xBB, 0xD0, 0xBB, 0xB6, 0x9A, 0xED, 0x53, 0x43, 0x90, 0x45, 0x51, 0xA3, 0xE6, 0xC1, 0xAA, 0x7D, 0xCD, 0xD7, 0x7E, 0x90, 0x3E, 0x70, 0x23, 0xEB, 0x7C, 0x60, 0x32, 0x0A, 0xA7 }, + { 0x46, 0x93, 0xF9, 0xBF, 0xF7, 0xD4, 0xF3, 0x98, 0x6A, 0x7D, 0x17, 0x6E, 0x6E, 0x06, 0xF7, 0x2A, 0xD1, 0x49, 0x0D, 0x80, 0x5C, 0x99, 0xE2, 0x53, 0x47, 0xB8, 0xDE, 0x77, 0xB4, 0xDB, 0x6D, 0x9B }, + { 0x85, 0x3E, 0x26, 0xF7, 0x41, 0x95, 0x3B, 0x0F, 0xD5, 0xBD, 0xB4, 0x24, 0xE8, 0xAB, 0x9E, 0x8B, 0x37, 0x50, 0xEA, 0xA8, 0xEF, 0x61, 0xE4, 0x79, 0x02, 0xC9, 0x1E, 0x55, 0x4E, 0x9C, 0x73, 0xB9 }, + { 0xF7, 0xDE, 0x53, 0x63, 0x61, 0xAB, 0xAA, 0x0E, 0x15, 0x81, 0x56, 0xCF, 0x0E, 0xA4, 0xF6, 0x3A, 0x99, 0xB5, 0xE4, 0x05, 0x4F, 0x8F, 0xA4, 0xC9, 0xD4, 0x5F, 0x62, 0x85, 0xCA, 0xD5, 0x56, 0x94 }, + { 0x4C, 0x23, 0x06, 0x08, 0x86, 0x0A, 0x99, 0xAE, 0x8D, 0x7B, 0xD5, 0xC2, 0xCC, 0x17, 0xFA, 0x52, 0x09, 0x6B, 0x9A, 0x61, 0xBE, 0xDB, 0x17, 0xCB, 0x76, 0x17, 0x86, 0x4A, 0xD2, 0x9C, 0xA7, 0xA6 }, + { 0xAE, 0xB9, 0x20, 0xEA, 0x87, 0x95, 0x2D, 0xAD, 0xB1, 0xFB, 0x75, 0x92, 0x91, 0xE3, 0x38, 0x81, 0x39, 0xA8, 0x72, 0x86, 0x50, 0x01, 0x88, 0x6E, 0xD8, 0x47, 0x52, 0xE9, 0x3C, 0x25, 0x0C, 0x2A }, + { 0xAB, 0xA4, 0xAD, 0x9B, 0x48, 0x0B, 0x9D, 0xF3, 0xD0, 0x8C, 0xA5, 0xE8, 0x7B, 0x0C, 0x24, 0x40, 0xD4, 0xE4, 0xEA, 0x21, 0x22, 0x4C, 0x2E, 0xB4, 0x2C, 0xBA, 0xE4, 0x69, 0xD0, 0x89, 0xB9, 0x31 }, + { 0x05, 0x82, 0x56, 0x07, 0xD7, 0xFD, 0xF2, 0xD8, 0x2E, 0xF4, 0xC3, 0xC8, 0xC2, 0xAE, 0xA9, 0x61, 0xAD, 0x98, 0xD6, 0x0E, 0xDF, 0xF7, 0xD0, 0x18, 0x98, 0x3E, 0x21, 0x20, 0x4C, 0x0D, 0x93, 0xD1 }, + { 0xA7, 0x42, 0xF8, 0xB6, 0xAF, 0x82, 0xD8, 0xA6, 0xCA, 0x23, 0x57, 0xC5, 0xF1, 0xCF, 0x91, 0xDE, 0xFB, 0xD0, 0x66, 0x26, 0x7D, 0x75, 0xC0, 0x48, 0xB3, 0x52, 0x36, 0x65, 0x85, 0x02, 0x59, 0x62 }, + { 0x2B, 0xCA, 0xC8, 0x95, 0x99, 0x00, 0x0B, 0x42, 0xC9, 0x5A, 0xE2, 0x38, 0x35, 0xA7, 0x13, 0x70, 0x4E, 0xD7, 0x97, 0x89, 0xC8, 0x4F, 0xEF, 0x14, 0x9A, 0x87, 0x4F, 0xF7, 0x33, 0xF0, 0x17, 0xA2 }, + { 0xAC, 0x1E, 0xD0, 0x7D, 0x04, 0x8F, 0x10, 0x5A, 0x9E, 0x5B, 0x7A, 0xB8, 0x5B, 0x09, 0xA4, 0x92, 0xD5, 0xBA, 0xFF, 0x14, 0xB8, 0xBF, 0xB0, 0xE9, 0xFD, 0x78, 0x94, 0x86, 0xEE, 0xA2, 0xB9, 0x74 }, + { 0xE4, 0x8D, 0x0E, 0xCF, 0xAF, 0x49, 0x7D, 0x5B, 0x27, 0xC2, 0x5D, 0x99, 0xE1, 0x56, 0xCB, 0x05, 0x79, 0xD4, 0x40, 0xD6, 0xE3, 0x1F, 0xB6, 0x24, 0x73, 0x69, 0x6D, 0xBF, 0x95, 0xE0, 0x10, 0xE4 }, + { 0x12, 0xA9, 0x1F, 0xAD, 0xF8, 0xB2, 0x16, 0x44, 0xFD, 0x0F, 0x93, 0x4F, 0x3C, 0x4A, 0x8F, 0x62, 0xBA, 0x86, 0x2F, 0xFD, 0x20, 0xE8, 0xE9, 0x61, 0x15, 0x4C, 0x15, 0xC1, 0x38, 0x84, 0xED, 0x3D }, + { 0x7C, 0xBE, 0xE9, 0x6E, 0x13, 0x98, 0x97, 0xDC, 0x98, 0xFB, 0xEF, 0x3B, 0xE8, 0x1A, 0xD4, 0xD9, 0x64, 0xD2, 0x35, 0xCB, 0x12, 0x14, 0x1F, 0xB6, 0x67, 0x27, 0xE6, 0xE5, 0xDF, 0x73, 0xA8, 0x78 }, + { 0xEB, 0xF6, 0x6A, 0xBB, 0x59, 0x7A, 0xE5, 0x72, 0xA7, 0x29, 0x7C, 0xB0, 0x87, 0x1E, 0x35, 0x5A, 0xCC, 0xAF, 0xAD, 0x83, 0x77, 0xB8, 0xE7, 0x8B, 0xF1, 0x64, 0xCE, 0x2A, 0x18, 0xDE, 0x4B, 0xAF }, + { 0x71, 0xB9, 0x33, 0xB0, 0x7E, 0x4F, 0xF7, 0x81, 0x8C, 0xE0, 0x59, 0xD0, 0x08, 0x82, 0x9E, 0x45, 0x3C, 0x6F, 0xF0, 0x2E, 0xC0, 0xA7, 0xDB, 0x39, 0x3F, 0xC2, 0xD8, 0x70, 0xF3, 0x7A, 0x72, 0x86 }, + { 0x7C, 0xF7, 0xC5, 0x13, 0x31, 0x22, 0x0B, 0x8D, 0x3E, 0xBA, 0xED, 0x9C, 0x29, 0x39, 0x8A, 0x16, 0xD9, 0x81, 0x56, 0xE2, 0x61, 0x3C, 0xB0, 0x88, 0xF2, 0xB0, 0xE0, 0x8A, 0x1B, 0xE4, 0xCF, 0x4F }, + { 0x3E, 0x41, 0xA1, 0x08, 0xE0, 0xF6, 0x4A, 0xD2, 0x76, 0xB9, 0x79, 0xE1, 0xCE, 0x06, 0x82, 0x79, 0xE1, 0x6F, 0x7B, 0xC7, 0xE4, 0xAA, 0x1D, 0x21, 0x1E, 0x17, 0xB8, 0x11, 0x61, 0xDF, 0x16, 0x02 }, + { 0x88, 0x65, 0x02, 0xA8, 0x2A, 0xB4, 0x7B, 0xA8, 0xD8, 0x67, 0x10, 0xAA, 0x9D, 0xE3, 0xD4, 0x6E, 0xA6, 0x5C, 0x47, 0xAF, 0x6E, 0xE8, 0xDE, 0x45, 0x0C, 0xCE, 0xB8, 0xB1, 0x1B, 0x04, 0x5F, 0x50 }, + { 0xC0, 0x21, 0xBC, 0x5F, 0x09, 0x54, 0xFE, 0xE9, 0x4F, 0x46, 0xEA, 0x09, 0x48, 0x7E, 0x10, 0xA8, 0x48, 0x40, 0xD0, 0x2F, 0x64, 0x81, 0x0B, 0xC0, 0x8D, 0x9E, 0x55, 0x1F, 0x7D, 0x41, 0x68, 0x14 }, + { 0x20, 0x30, 0x51, 0x6E, 0x8A, 0x5F, 0xE1, 0x9A, 0xE7, 0x9C, 0x33, 0x6F, 0xCE, 0x26, 0x38, 0x2A, 0x74, 0x9D, 0x3F, 0xD0, 0xEC, 0x91, 0xE5, 0x37, 0xD4, 0xBD, 0x23, 0x58, 0xC1, 0x2D, 0xFB, 0x22 }, + { 0x55, 0x66, 0x98, 0xDA, 0xC8, 0x31, 0x7F, 0xD3, 0x6D, 0xFB, 0xDF, 0x25, 0xA7, 0x9C, 0xB1, 0x12, 0xD5, 0x42, 0x58, 0x60, 0x60, 0x5C, 0xBA, 0xF5, 0x07, 0xF2, 0x3B, 0xF7, 0xE9, 0xF4, 0x2A, 0xFE }, + { 0x2F, 0x86, 0x7B, 0xA6, 0x77, 0x73, 0xFD, 0xC3, 0xE9, 0x2F, 0xCE, 0xD9, 0x9A, 0x64, 0x09, 0xAD, 0x39, 0xD0, 0xB8, 0x80, 0xFD, 0xE8, 0xF1, 0x09, 0xA8, 0x17, 0x30, 0xC4, 0x45, 0x1D, 0x01, 0x78 }, + { 0x17, 0x2E, 0xC2, 0x18, 0xF1, 0x19, 0xDF, 0xAE, 0x98, 0x89, 0x6D, 0xFF, 0x29, 0xDD, 0x98, 0x76, 0xC9, 0x4A, 0xF8, 0x74, 0x17, 0xF9, 0xAE, 0x4C, 0x70, 0x14, 0xBB, 0x4E, 0x4B, 0x96, 0xAF, 0xC7 }, + { 0x3F, 0x85, 0x81, 0x4A, 0x18, 0x19, 0x5F, 0x87, 0x9A, 0xA9, 0x62, 0xF9, 0x5D, 0x26, 0xBD, 0x82, 0xA2, 0x78, 0xF2, 0xB8, 0x23, 0x20, 0x21, 0x8F, 0x6B, 0x3B, 0xD6, 0xF7, 0xF6, 0x67, 0xA6, 0xD9 }, + { 0x1B, 0x61, 0x8F, 0xBA, 0xA5, 0x66, 0xB3, 0xD4, 0x98, 0xC1, 0x2E, 0x98, 0x2C, 0x9E, 0xC5, 0x2E, 0x4D, 0xA8, 0x5A, 0x8C, 0x54, 0xF3, 0x8F, 0x34, 0xC0, 0x90, 0x39, 0x4F, 0x23, 0xC1, 0x84, 0xC1 }, + { 0x0C, 0x75, 0x8F, 0xB5, 0x69, 0x2F, 0xFD, 0x41, 0xA3, 0x57, 0x5D, 0x0A, 0xF0, 0x0C, 0xC7, 0xFB, 0xF2, 0xCB, 0xE5, 0x90, 0x5A, 0x58, 0x32, 0x3A, 0x88, 0xAE, 0x42, 0x44, 0xF6, 0xE4, 0xC9, 0x93 }, + { 0xA9, 0x31, 0x36, 0x0C, 0xAD, 0x62, 0x8C, 0x7F, 0x12, 0xA6, 0xC1, 0xC4, 0xB7, 0x53, 0xB0, 0xF4, 0x06, 0x2A, 0xEF, 0x3C, 0xE6, 0x5A, 0x1A, 0xE3, 0xF1, 0x93, 0x69, 0xDA, 0xDF, 0x3A, 0xE2, 0x3D }, + { 0xCB, 0xAC, 0x7D, 0x77, 0x3B, 0x1E, 0x3B, 0x3C, 0x66, 0x91, 0xD7, 0xAB, 0xB7, 0xE9, 0xDF, 0x04, 0x5C, 0x8B, 0xA1, 0x92, 0x68, 0xDE, 0xD1, 0x53, 0x20, 0x7F, 0x5E, 0x80, 0x43, 0x52, 0xEC, 0x5D }, + { 0x23, 0xA1, 0x96, 0xD3, 0x80, 0x2E, 0xD3, 0xC1, 0xB3, 0x84, 0x01, 0x9A, 0x82, 0x32, 0x58, 0x40, 0xD3, 0x2F, 0x71, 0x95, 0x0C, 0x45, 0x80, 0xB0, 0x34, 0x45, 0xE0, 0x89, 0x8E, 0x14, 0x05, 0x3C }, + { 0xF4, 0x49, 0x54, 0x70, 0xF2, 0x26, 0xC8, 0xC2, 0x14, 0xBE, 0x08, 0xFD, 0xFA, 0xD4, 0xBC, 0x4A, 0x2A, 0x9D, 0xBE, 0xA9, 0x13, 0x6A, 0x21, 0x0D, 0xF0, 0xD4, 0xB6, 0x49, 0x29, 0xE6, 0xFC, 0x14 }, + { 0xE2, 0x90, 0xDD, 0x27, 0x0B, 0x46, 0x7F, 0x34, 0xAB, 0x1C, 0x00, 0x2D, 0x34, 0x0F, 0xA0, 0x16, 0x25, 0x7F, 0xF1, 0x9E, 0x58, 0x33, 0xFD, 0xBB, 0xF2, 0xCB, 0x40, 0x1C, 0x3B, 0x28, 0x17, 0xDE }, + { 0x9F, 0xC7, 0xB5, 0xDE, 0xD3, 0xC1, 0x50, 0x42, 0xB2, 0xA6, 0x58, 0x2D, 0xC3, 0x9B, 0xE0, 0x16, 0xD2, 0x4A, 0x68, 0x2D, 0x5E, 0x61, 0xAD, 0x1E, 0xFF, 0x9C, 0x63, 0x30, 0x98, 0x48, 0xF7, 0x06 }, + { 0x8C, 0xCA, 0x67, 0xA3, 0x6D, 0x17, 0xD5, 0xE6, 0x34, 0x1C, 0xB5, 0x92, 0xFD, 0x7B, 0xEF, 0x99, 0x26, 0xC9, 0xE3, 0xAA, 0x10, 0x27, 0xEA, 0x11, 0xA7, 0xD8, 0xBD, 0x26, 0x0B, 0x57, 0x6E, 0x04 }, + { 0x40, 0x93, 0x92, 0xF5, 0x60, 0xF8, 0x68, 0x31, 0xDA, 0x43, 0x73, 0xEE, 0x5E, 0x00, 0x74, 0x26, 0x05, 0x95, 0xD7, 0xBC, 0x24, 0x18, 0x3B, 0x60, 0xED, 0x70, 0x0D, 0x45, 0x83, 0xD3, 0xF6, 0xF0 }, + { 0x28, 0x02, 0x16, 0x5D, 0xE0, 0x90, 0x91, 0x55, 0x46, 0xF3, 0x39, 0x8C, 0xD8, 0x49, 0x16, 0x4A, 0x19, 0xF9, 0x2A, 0xDB, 0xC3, 0x61, 0xAD, 0xC9, 0x9B, 0x0F, 0x20, 0xC8, 0xEA, 0x07, 0x10, 0x54 }, + { 0xAD, 0x83, 0x91, 0x68, 0xD9, 0xF8, 0xA4, 0xBE, 0x95, 0xBA, 0x9E, 0xF9, 0xA6, 0x92, 0xF0, 0x72, 0x56, 0xAE, 0x43, 0xFE, 0x6F, 0x98, 0x64, 0xE2, 0x90, 0x69, 0x1B, 0x02, 0x56, 0xCE, 0x50, 0xA9 }, + { 0x75, 0xFD, 0xAA, 0x50, 0x38, 0xC2, 0x84, 0xB8, 0x6D, 0x6E, 0x8A, 0xFF, 0xE8, 0xB2, 0x80, 0x7E, 0x46, 0x7B, 0x86, 0x60, 0x0E, 0x79, 0xAF, 0x36, 0x89, 0xFB, 0xC0, 0x63, 0x28, 0xCB, 0xF8, 0x94 }, + { 0xE5, 0x7C, 0xB7, 0x94, 0x87, 0xDD, 0x57, 0x90, 0x24, 0x32, 0xB2, 0x50, 0x73, 0x38, 0x13, 0xBD, 0x96, 0xA8, 0x4E, 0xFC, 0xE5, 0x9F, 0x65, 0x0F, 0xAC, 0x26, 0xE6, 0x69, 0x6A, 0xEF, 0xAF, 0xC3 }, + { 0x56, 0xF3, 0x4E, 0x8B, 0x96, 0x55, 0x7E, 0x90, 0xC1, 0xF2, 0x4B, 0x52, 0xD0, 0xC8, 0x9D, 0x51, 0x08, 0x6A, 0xCF, 0x1B, 0x00, 0xF6, 0x34, 0xCF, 0x1D, 0xDE, 0x92, 0x33, 0xB8, 0xEA, 0xAA, 0x3E }, + { 0x1B, 0x53, 0xEE, 0x94, 0xAA, 0xF3, 0x4E, 0x4B, 0x15, 0x9D, 0x48, 0xDE, 0x35, 0x2C, 0x7F, 0x06, 0x61, 0xD0, 0xA4, 0x0E, 0xDF, 0xF9, 0x5A, 0x0B, 0x16, 0x39, 0xB4, 0x09, 0x0E, 0x97, 0x44, 0x72 }, + { 0x05, 0x70, 0x5E, 0x2A, 0x81, 0x75, 0x7C, 0x14, 0xBD, 0x38, 0x3E, 0xA9, 0x8D, 0xDA, 0x54, 0x4E, 0xB1, 0x0E, 0x6B, 0xC0, 0x7B, 0xAE, 0x43, 0x5E, 0x25, 0x18, 0xDB, 0xE1, 0x33, 0x52, 0x53, 0x75 }, + { 0xD8, 0xB2, 0x86, 0x6E, 0x8A, 0x30, 0x9D, 0xB5, 0x3E, 0x52, 0x9E, 0xC3, 0x29, 0x11, 0xD8, 0x2F, 0x5C, 0xA1, 0x6C, 0xFF, 0x76, 0x21, 0x68, 0x91, 0xA9, 0x67, 0x6A, 0xA3, 0x1A, 0xAA, 0x6C, 0x42 }, + { 0xF5, 0x04, 0x1C, 0x24, 0x12, 0x70, 0xEB, 0x04, 0xC7, 0x1E, 0xC2, 0xC9, 0x5D, 0x4C, 0x38, 0xD8, 0x03, 0xB1, 0x23, 0x7B, 0x0F, 0x29, 0xFD, 0x4D, 0xB3, 0xEB, 0x39, 0x76, 0x69, 0xE8, 0x86, 0x99 }, + { 0x9A, 0x4C, 0xE0, 0x77, 0xC3, 0x49, 0x32, 0x2F, 0x59, 0x5E, 0x0E, 0xE7, 0x9E, 0xD0, 0xDA, 0x5F, 0xAB, 0x66, 0x75, 0x2C, 0xBF, 0xEF, 0x8F, 0x87, 0xD0, 0xE9, 0xD0, 0x72, 0x3C, 0x75, 0x30, 0xDD }, + { 0x65, 0x7B, 0x09, 0xF3, 0xD0, 0xF5, 0x2B, 0x5B, 0x8F, 0x2F, 0x97, 0x16, 0x3A, 0x0E, 0xDF, 0x0C, 0x04, 0xF0, 0x75, 0x40, 0x8A, 0x07, 0xBB, 0xEB, 0x3A, 0x41, 0x01, 0xA8, 0x91, 0x99, 0x0D, 0x62 }, + { 0x1E, 0x3F, 0x7B, 0xD5, 0xA5, 0x8F, 0xA5, 0x33, 0x34, 0x4A, 0xA8, 0xED, 0x3A, 0xC1, 0x22, 0xBB, 0x9E, 0x70, 0xD4, 0xEF, 0x50, 0xD0, 0x04, 0x53, 0x08, 0x21, 0x94, 0x8F, 0x5F, 0xE6, 0x31, 0x5A }, + { 0x80, 0xDC, 0xCF, 0x3F, 0xD8, 0x3D, 0xFD, 0x0D, 0x35, 0xAA, 0x28, 0x58, 0x59, 0x22, 0xAB, 0x89, 0xD5, 0x31, 0x39, 0x97, 0x67, 0x3E, 0xAF, 0x90, 0x5C, 0xEA, 0x9C, 0x0B, 0x22, 0x5C, 0x7B, 0x5F }, + { 0x8A, 0x0D, 0x0F, 0xBF, 0x63, 0x77, 0xD8, 0x3B, 0xB0, 0x8B, 0x51, 0x4B, 0x4B, 0x1C, 0x43, 0xAC, 0xC9, 0x5D, 0x75, 0x17, 0x14, 0xF8, 0x92, 0x56, 0x45, 0xCB, 0x6B, 0xC8, 0x56, 0xCA, 0x15, 0x0A }, + { 0x9F, 0xA5, 0xB4, 0x87, 0x73, 0x8A, 0xD2, 0x84, 0x4C, 0xC6, 0x34, 0x8A, 0x90, 0x19, 0x18, 0xF6, 0x59, 0xA3, 0xB8, 0x9E, 0x9C, 0x0D, 0xFE, 0xEA, 0xD3, 0x0D, 0xD9, 0x4B, 0xCF, 0x42, 0xEF, 0x8E }, + { 0x80, 0x83, 0x2C, 0x4A, 0x16, 0x77, 0xF5, 0xEA, 0x25, 0x60, 0xF6, 0x68, 0xE9, 0x35, 0x4D, 0xD3, 0x69, 0x97, 0xF0, 0x37, 0x28, 0xCF, 0xA5, 0x5E, 0x1B, 0x38, 0x33, 0x7C, 0x0C, 0x9E, 0xF8, 0x18 }, + { 0xAB, 0x37, 0xDD, 0xB6, 0x83, 0x13, 0x7E, 0x74, 0x08, 0x0D, 0x02, 0x6B, 0x59, 0x0B, 0x96, 0xAE, 0x9B, 0xB4, 0x47, 0x72, 0x2F, 0x30, 0x5A, 0x5A, 0xC5, 0x70, 0xEC, 0x1D, 0xF9, 0xB1, 0x74, 0x3C }, + { 0x3E, 0xE7, 0x35, 0xA6, 0x94, 0xC2, 0x55, 0x9B, 0x69, 0x3A, 0xA6, 0x86, 0x29, 0x36, 0x1E, 0x15, 0xD1, 0x22, 0x65, 0xAD, 0x6A, 0x3D, 0xED, 0xF4, 0x88, 0xB0, 0xB0, 0x0F, 0xAC, 0x97, 0x54, 0xBA }, + { 0xD6, 0xFC, 0xD2, 0x32, 0x19, 0xB6, 0x47, 0xE4, 0xCB, 0xD5, 0xEB, 0x2D, 0x0A, 0xD0, 0x1E, 0xC8, 0x83, 0x8A, 0x4B, 0x29, 0x01, 0xFC, 0x32, 0x5C, 0xC3, 0x70, 0x19, 0x81, 0xCA, 0x6C, 0x88, 0x8B }, + { 0x05, 0x20, 0xEC, 0x2F, 0x5B, 0xF7, 0xA7, 0x55, 0xDA, 0xCB, 0x50, 0xC6, 0xBF, 0x23, 0x3E, 0x35, 0x15, 0x43, 0x47, 0x63, 0xDB, 0x01, 0x39, 0xCC, 0xD9, 0xFA, 0xEF, 0xBB, 0x82, 0x07, 0x61, 0x2D }, + { 0xAF, 0xF3, 0xB7, 0x5F, 0x3F, 0x58, 0x12, 0x64, 0xD7, 0x66, 0x16, 0x62, 0xB9, 0x2F, 0x5A, 0xD3, 0x7C, 0x1D, 0x32, 0xBD, 0x45, 0xFF, 0x81, 0xA4, 0xED, 0x8A, 0xDC, 0x9E, 0xF3, 0x0D, 0xD9, 0x89 }, + { 0xD0, 0xDD, 0x65, 0x0B, 0xEF, 0xD3, 0xBA, 0x63, 0xDC, 0x25, 0x10, 0x2C, 0x62, 0x7C, 0x92, 0x1B, 0x9C, 0xBE, 0xB0, 0xB1, 0x30, 0x68, 0x69, 0x35, 0xB5, 0xC9, 0x27, 0xCB, 0x7C, 0xCD, 0x5E, 0x3B }, + { 0xE1, 0x14, 0x98, 0x16, 0xB1, 0x0A, 0x85, 0x14, 0xFB, 0x3E, 0x2C, 0xAB, 0x2C, 0x08, 0xBE, 0xE9, 0xF7, 0x3C, 0xE7, 0x62, 0x21, 0x70, 0x12, 0x46, 0xA5, 0x89, 0xBB, 0xB6, 0x73, 0x02, 0xD8, 0xA9 }, + { 0x7D, 0xA3, 0xF4, 0x41, 0xDE, 0x90, 0x54, 0x31, 0x7E, 0x72, 0xB5, 0xDB, 0xF9, 0x79, 0xDA, 0x01, 0xE6, 0xBC, 0xEE, 0xBB, 0x84, 0x78, 0xEA, 0xE6, 0xA2, 0x28, 0x49, 0xD9, 0x02, 0x92, 0x63, 0x5C }, + { 0x12, 0x30, 0xB1, 0xFC, 0x8A, 0x7D, 0x92, 0x15, 0xED, 0xC2, 0xD4, 0xA2, 0xDE, 0xCB, 0xDD, 0x0A, 0x6E, 0x21, 0x6C, 0x92, 0x42, 0x78, 0xC9, 0x1F, 0xC5, 0xD1, 0x0E, 0x7D, 0x60, 0x19, 0x2D, 0x94 }, + { 0x57, 0x50, 0xD7, 0x16, 0xB4, 0x80, 0x8F, 0x75, 0x1F, 0xEB, 0xC3, 0x88, 0x06, 0xBA, 0x17, 0x0B, 0xF6, 0xD5, 0x19, 0x9A, 0x78, 0x16, 0xBE, 0x51, 0x4E, 0x3F, 0x93, 0x2F, 0xBE, 0x0C, 0xB8, 0x71 }, + { 0x6F, 0xC5, 0x9B, 0x2F, 0x10, 0xFE, 0xBA, 0x95, 0x4A, 0xA6, 0x82, 0x0B, 0x3C, 0xA9, 0x87, 0xEE, 0x81, 0xD5, 0xCC, 0x1D, 0xA3, 0xC6, 0x3C, 0xE8, 0x27, 0x30, 0x1C, 0x56, 0x9D, 0xFB, 0x39, 0xCE }, + { 0xC7, 0xC3, 0xFE, 0x1E, 0xEB, 0xDC, 0x7B, 0x5A, 0x93, 0x93, 0x26, 0xE8, 0xDD, 0xB8, 0x3E, 0x8B, 0xF2, 0xB7, 0x80, 0xB6, 0x56, 0x78, 0xCB, 0x62, 0xF2, 0x08, 0xB0, 0x40, 0xAB, 0xDD, 0x35, 0xE2 }, + { 0x0C, 0x75, 0xC1, 0xA1, 0x5C, 0xF3, 0x4A, 0x31, 0x4E, 0xE4, 0x78, 0xF4, 0xA5, 0xCE, 0x0B, 0x8A, 0x6B, 0x36, 0x52, 0x8E, 0xF7, 0xA8, 0x20, 0x69, 0x6C, 0x3E, 0x42, 0x46, 0xC5, 0xA1, 0x58, 0x64 }, + { 0x21, 0x6D, 0xC1, 0x2A, 0x10, 0x85, 0x69, 0xA3, 0xC7, 0xCD, 0xDE, 0x4A, 0xED, 0x43, 0xA6, 0xC3, 0x30, 0x13, 0x9D, 0xDA, 0x3C, 0xCC, 0x4A, 0x10, 0x89, 0x05, 0xDB, 0x38, 0x61, 0x89, 0x90, 0x50 }, + { 0xA5, 0x7B, 0xE6, 0xAE, 0x67, 0x56, 0xF2, 0x8B, 0x02, 0xF5, 0x9D, 0xAD, 0xF7, 0xE0, 0xD7, 0xD8, 0x80, 0x7F, 0x10, 0xFA, 0x15, 0xCE, 0xD1, 0xAD, 0x35, 0x85, 0x52, 0x1A, 0x1D, 0x99, 0x5A, 0x89 }, + { 0x81, 0x6A, 0xEF, 0x87, 0x59, 0x53, 0x71, 0x6C, 0xD7, 0xA5, 0x81, 0xF7, 0x32, 0xF5, 0x3D, 0xD4, 0x35, 0xDA, 0xB6, 0x6D, 0x09, 0xC3, 0x61, 0xD2, 0xD6, 0x59, 0x2D, 0xE1, 0x77, 0x55, 0xD8, 0xA8 }, + { 0x9A, 0x76, 0x89, 0x32, 0x26, 0x69, 0x3B, 0x6E, 0xA9, 0x7E, 0x6A, 0x73, 0x8F, 0x9D, 0x10, 0xFB, 0x3D, 0x0B, 0x43, 0xAE, 0x0E, 0x8B, 0x7D, 0x81, 0x23, 0xEA, 0x76, 0xCE, 0x97, 0x98, 0x9C, 0x7E }, + { 0x8D, 0xAE, 0xDB, 0x9A, 0x27, 0x15, 0x29, 0xDB, 0xB7, 0xDC, 0x3B, 0x60, 0x7F, 0xE5, 0xEB, 0x2D, 0x32, 0x11, 0x77, 0x07, 0x58, 0xDD, 0x3B, 0x0A, 0x35, 0x93, 0xD2, 0xD7, 0x95, 0x4E, 0x2D, 0x5B }, + { 0x16, 0xDB, 0xC0, 0xAA, 0x5D, 0xD2, 0xC7, 0x74, 0xF5, 0x05, 0x10, 0x0F, 0x73, 0x37, 0x86, 0xD8, 0xA1, 0x75, 0xFC, 0xBB, 0xB5, 0x9C, 0x43, 0xE1, 0xFB, 0xFF, 0x3E, 0x1E, 0xAF, 0x31, 0xCB, 0x4A }, + { 0x86, 0x06, 0xCB, 0x89, 0x9C, 0x6A, 0xEA, 0xF5, 0x1B, 0x9D, 0xB0, 0xFE, 0x49, 0x24, 0xA9, 0xFD, 0x5D, 0xAB, 0xC1, 0x9F, 0x88, 0x26, 0xF2, 0xBC, 0x1C, 0x1D, 0x7D, 0xA1, 0x4D, 0x2C, 0x2C, 0x99 }, + { 0x84, 0x79, 0x73, 0x1A, 0xED, 0xA5, 0x7B, 0xD3, 0x7E, 0xAD, 0xB5, 0x1A, 0x50, 0x7E, 0x30, 0x7F, 0x3B, 0xD9, 0x5E, 0x69, 0xDB, 0xCA, 0x94, 0xF3, 0xBC, 0x21, 0x72, 0x60, 0x66, 0xAD, 0x6D, 0xFD }, + { 0x58, 0x47, 0x3A, 0x9E, 0xA8, 0x2E, 0xFA, 0x3F, 0x3B, 0x3D, 0x8F, 0xC8, 0x3E, 0xD8, 0x86, 0x31, 0x27, 0xB3, 0x3A, 0xE8, 0xDE, 0xAE, 0x63, 0x07, 0x20, 0x1E, 0xDB, 0x6D, 0xDE, 0x61, 0xDE, 0x29 }, + { 0x9A, 0x92, 0x55, 0xD5, 0x3A, 0xF1, 0x16, 0xDE, 0x8B, 0xA2, 0x7C, 0xE3, 0x5B, 0x4C, 0x7E, 0x15, 0x64, 0x06, 0x57, 0xA0, 0xFC, 0xB8, 0x88, 0xC7, 0x0D, 0x95, 0x43, 0x1D, 0xAC, 0xD8, 0xF8, 0x30 }, + { 0x9E, 0xB0, 0x5F, 0xFB, 0xA3, 0x9F, 0xD8, 0x59, 0x6A, 0x45, 0x49, 0x3E, 0x18, 0xD2, 0x51, 0x0B, 0xF3, 0xEF, 0x06, 0x5C, 0x51, 0xD6, 0xE1, 0x3A, 0xBE, 0x66, 0xAA, 0x57, 0xE0, 0x5C, 0xFD, 0xB7 }, + { 0x81, 0xDC, 0xC3, 0xA5, 0x05, 0xEA, 0xCE, 0x3F, 0x87, 0x9D, 0x8F, 0x70, 0x27, 0x76, 0x77, 0x0F, 0x9D, 0xF5, 0x0E, 0x52, 0x1D, 0x14, 0x28, 0xA8, 0x5D, 0xAF, 0x04, 0xF9, 0xAD, 0x21, 0x50, 0xE0 }, + { 0xE3, 0xE3, 0xC4, 0xAA, 0x3A, 0xCB, 0xBC, 0x85, 0x33, 0x2A, 0xF9, 0xD5, 0x64, 0xBC, 0x24, 0x16, 0x5E, 0x16, 0x87, 0xF6, 0xB1, 0xAD, 0xCB, 0xFA, 0xE7, 0x7A, 0x8F, 0x03, 0xC7, 0x2A, 0xC2, 0x8C }, + { 0x67, 0x46, 0xC8, 0x0B, 0x4E, 0xB5, 0x6A, 0xEA, 0x45, 0xE6, 0x4E, 0x72, 0x89, 0xBB, 0xA3, 0xED, 0xBF, 0x45, 0xEC, 0xF8, 0x20, 0x64, 0x81, 0xFF, 0x63, 0x02, 0x12, 0x29, 0x84, 0xCD, 0x52, 0x6A }, + { 0x2B, 0x62, 0x8E, 0x52, 0x76, 0x4D, 0x7D, 0x62, 0xC0, 0x86, 0x8B, 0x21, 0x23, 0x57, 0xCD, 0xD1, 0x2D, 0x91, 0x49, 0x82, 0x2F, 0x4E, 0x98, 0x45, 0xD9, 0x18, 0xA0, 0x8D, 0x1A, 0xE9, 0x90, 0xC0 }, + { 0xE4, 0xBF, 0xE8, 0x0D, 0x58, 0xC9, 0x19, 0x94, 0x61, 0x39, 0x09, 0xDC, 0x4B, 0x1A, 0x12, 0x49, 0x68, 0x96, 0xC0, 0x04, 0xAF, 0x7B, 0x57, 0x01, 0x48, 0x3D, 0xE4, 0x5D, 0x28, 0x23, 0xD7, 0x8E }, + { 0xEB, 0xB4, 0xBA, 0x15, 0x0C, 0xEF, 0x27, 0x34, 0x34, 0x5B, 0x5D, 0x64, 0x1B, 0xBE, 0xD0, 0x3A, 0x21, 0xEA, 0xFA, 0xE9, 0x33, 0xC9, 0x9E, 0x00, 0x92, 0x12, 0xEF, 0x04, 0x57, 0x4A, 0x85, 0x30 }, + { 0x39, 0x66, 0xEC, 0x73, 0xB1, 0x54, 0xAC, 0xC6, 0x97, 0xAC, 0x5C, 0xF5, 0xB2, 0x4B, 0x40, 0xBD, 0xB0, 0xDB, 0x9E, 0x39, 0x88, 0x36, 0xD7, 0x6D, 0x4B, 0x88, 0x0E, 0x3B, 0x2A, 0xF1, 0xAA, 0x27 }, + { 0xEF, 0x7E, 0x48, 0x31, 0xB3, 0xA8, 0x46, 0x36, 0x51, 0x8D, 0x6E, 0x4B, 0xFC, 0xE6, 0x4A, 0x43, 0xDB, 0x2A, 0x5D, 0xDA, 0x9C, 0xCA, 0x2B, 0x44, 0xF3, 0x90, 0x33, 0xBD, 0xC4, 0x0D, 0x62, 0x43 }, + { 0x7A, 0xBF, 0x6A, 0xCF, 0x5C, 0x8E, 0x54, 0x9D, 0xDB, 0xB1, 0x5A, 0xE8, 0xD8, 0xB3, 0x88, 0xC1, 0xC1, 0x97, 0xE6, 0x98, 0x73, 0x7C, 0x97, 0x85, 0x50, 0x1E, 0xD1, 0xF9, 0x49, 0x30, 0xB7, 0xD9 }, + { 0x88, 0x01, 0x8D, 0xED, 0x66, 0x81, 0x3F, 0x0C, 0xA9, 0x5D, 0xEF, 0x47, 0x4C, 0x63, 0x06, 0x92, 0x01, 0x99, 0x67, 0xB9, 0xE3, 0x68, 0x88, 0xDA, 0xDD, 0x94, 0x12, 0x47, 0x19, 0xB6, 0x82, 0xF6 }, + { 0x39, 0x30, 0x87, 0x6B, 0x9F, 0xC7, 0x52, 0x90, 0x36, 0xB0, 0x08, 0xB1, 0xB8, 0xBB, 0x99, 0x75, 0x22, 0xA4, 0x41, 0x63, 0x5A, 0x0C, 0x25, 0xEC, 0x02, 0xFB, 0x6D, 0x90, 0x26, 0xE5, 0x5A, 0x97 }, + { 0x0A, 0x40, 0x49, 0xD5, 0x7E, 0x83, 0x3B, 0x56, 0x95, 0xFA, 0xC9, 0x3D, 0xD1, 0xFB, 0xEF, 0x31, 0x66, 0xB4, 0x4B, 0x12, 0xAD, 0x11, 0x24, 0x86, 0x62, 0x38, 0x3A, 0xE0, 0x51, 0xE1, 0x58, 0x27 }, + { 0x81, 0xDC, 0xC0, 0x67, 0x8B, 0xB6, 0xA7, 0x65, 0xE4, 0x8C, 0x32, 0x09, 0x65, 0x4F, 0xE9, 0x00, 0x89, 0xCE, 0x44, 0xFF, 0x56, 0x18, 0x47, 0x7E, 0x39, 0xAB, 0x28, 0x64, 0x76, 0xDF, 0x05, 0x2B }, + { 0xE6, 0x9B, 0x3A, 0x36, 0xA4, 0x46, 0x19, 0x12, 0xDC, 0x08, 0x34, 0x6B, 0x11, 0xDD, 0xCB, 0x9D, 0xB7, 0x96, 0xF8, 0x85, 0xFD, 0x01, 0x93, 0x6E, 0x66, 0x2F, 0xE2, 0x92, 0x97, 0xB0, 0x99, 0xA4 }, + { 0x5A, 0xC6, 0x50, 0x3B, 0x0D, 0x8D, 0xA6, 0x91, 0x76, 0x46, 0xE6, 0xDC, 0xC8, 0x7E, 0xDC, 0x58, 0xE9, 0x42, 0x45, 0x32, 0x4C, 0xC2, 0x04, 0xF4, 0xDD, 0x4A, 0xF0, 0x15, 0x63, 0xAC, 0xD4, 0x27 }, + { 0xDF, 0x6D, 0xDA, 0x21, 0x35, 0x9A, 0x30, 0xBC, 0x27, 0x17, 0x80, 0x97, 0x1C, 0x1A, 0xBD, 0x56, 0xA6, 0xEF, 0x16, 0x7E, 0x48, 0x08, 0x87, 0x88, 0x8E, 0x73, 0xA8, 0x6D, 0x3B, 0xF6, 0x05, 0xE9 }, + { 0xE8, 0xE6, 0xE4, 0x70, 0x71, 0xE7, 0xB7, 0xDF, 0x25, 0x80, 0xF2, 0x25, 0xCF, 0xBB, 0xED, 0xF8, 0x4C, 0xE6, 0x77, 0x46, 0x62, 0x66, 0x28, 0xD3, 0x30, 0x97, 0xE4, 0xB7, 0xDC, 0x57, 0x11, 0x07 }, + { 0x53, 0xE4, 0x0E, 0xAD, 0x62, 0x05, 0x1E, 0x19, 0xCB, 0x9B, 0xA8, 0x13, 0x3E, 0x3E, 0x5C, 0x1C, 0xE0, 0x0D, 0xDC, 0xAD, 0x8A, 0xCF, 0x34, 0x2A, 0x22, 0x43, 0x60, 0xB0, 0xAC, 0xC1, 0x47, 0x77 }, + { 0x9C, 0xCD, 0x53, 0xFE, 0x80, 0xBE, 0x78, 0x6A, 0xA9, 0x84, 0x63, 0x84, 0x62, 0xFB, 0x28, 0xAF, 0xDF, 0x12, 0x2B, 0x34, 0xD7, 0x8F, 0x46, 0x87, 0xEC, 0x63, 0x2B, 0xB1, 0x9D, 0xE2, 0x37, 0x1A }, + { 0xCB, 0xD4, 0x80, 0x52, 0xC4, 0x8D, 0x78, 0x84, 0x66, 0xA3, 0xE8, 0x11, 0x8C, 0x56, 0xC9, 0x7F, 0xE1, 0x46, 0xE5, 0x54, 0x6F, 0xAA, 0xF9, 0x3E, 0x2B, 0xC3, 0xC4, 0x7E, 0x45, 0x93, 0x97, 0x53 }, + { 0x25, 0x68, 0x83, 0xB1, 0x4E, 0x2A, 0xF4, 0x4D, 0xAD, 0xB2, 0x8E, 0x1B, 0x34, 0xB2, 0xAC, 0x0F, 0x0F, 0x4C, 0x91, 0xC3, 0x4E, 0xC9, 0x16, 0x9E, 0x29, 0x03, 0x61, 0x58, 0xAC, 0xAA, 0x95, 0xB9 }, + { 0x44, 0x71, 0xB9, 0x1A, 0xB4, 0x2D, 0xB7, 0xC4, 0xDD, 0x84, 0x90, 0xAB, 0x95, 0xA2, 0xEE, 0x8D, 0x04, 0xE3, 0xEF, 0x5C, 0x3D, 0x6F, 0xC7, 0x1A, 0xC7, 0x4B, 0x2B, 0x26, 0x91, 0x4D, 0x16, 0x41 }, + { 0xA5, 0xEB, 0x08, 0x03, 0x8F, 0x8F, 0x11, 0x55, 0xED, 0x86, 0xE6, 0x31, 0x90, 0x6F, 0xC1, 0x30, 0x95, 0xF6, 0xBB, 0xA4, 0x1D, 0xE5, 0xD4, 0xE7, 0x95, 0x75, 0x8E, 0xC8, 0xC8, 0xDF, 0x8A, 0xF1 }, + { 0xDC, 0x1D, 0xB6, 0x4E, 0xD8, 0xB4, 0x8A, 0x91, 0x0E, 0x06, 0x0A, 0x6B, 0x86, 0x63, 0x74, 0xC5, 0x78, 0x78, 0x4E, 0x9A, 0xC4, 0x9A, 0xB2, 0x77, 0x40, 0x92, 0xAC, 0x71, 0x50, 0x19, 0x34, 0xAC }, + { 0x28, 0x54, 0x13, 0xB2, 0xF2, 0xEE, 0x87, 0x3D, 0x34, 0x31, 0x9E, 0xE0, 0xBB, 0xFB, 0xB9, 0x0F, 0x32, 0xDA, 0x43, 0x4C, 0xC8, 0x7E, 0x3D, 0xB5, 0xED, 0x12, 0x1B, 0xB3, 0x98, 0xED, 0x96, 0x4B }, + { 0x02, 0x16, 0xE0, 0xF8, 0x1F, 0x75, 0x0F, 0x26, 0xF1, 0x99, 0x8B, 0xC3, 0x93, 0x4E, 0x3E, 0x12, 0x4C, 0x99, 0x45, 0xE6, 0x85, 0xA6, 0x0B, 0x25, 0xE8, 0xFB, 0xD9, 0x62, 0x5A, 0xB6, 0xB5, 0x99 }, + { 0x38, 0xC4, 0x10, 0xF5, 0xB9, 0xD4, 0x07, 0x20, 0x50, 0x75, 0x5B, 0x31, 0xDC, 0xA8, 0x9F, 0xD5, 0x39, 0x5C, 0x67, 0x85, 0xEE, 0xB3, 0xD7, 0x90, 0xF3, 0x20, 0xFF, 0x94, 0x1C, 0x5A, 0x93, 0xBF }, + { 0xF1, 0x84, 0x17, 0xB3, 0x9D, 0x61, 0x7A, 0xB1, 0xC1, 0x8F, 0xDF, 0x91, 0xEB, 0xD0, 0xFC, 0x6D, 0x55, 0x16, 0xBB, 0x34, 0xCF, 0x39, 0x36, 0x40, 0x37, 0xBC, 0xE8, 0x1F, 0xA0, 0x4C, 0xEC, 0xB1 }, + { 0x1F, 0xA8, 0x77, 0xDE, 0x67, 0x25, 0x9D, 0x19, 0x86, 0x3A, 0x2A, 0x34, 0xBC, 0xC6, 0x96, 0x2A, 0x2B, 0x25, 0xFC, 0xBF, 0x5C, 0xBE, 0xCD, 0x7E, 0xDE, 0x8F, 0x1F, 0xA3, 0x66, 0x88, 0xA7, 0x96 }, + { 0x5B, 0xD1, 0x69, 0xE6, 0x7C, 0x82, 0xC2, 0xC2, 0xE9, 0x8E, 0xF7, 0x00, 0x8B, 0xDF, 0x26, 0x1F, 0x2D, 0xDF, 0x30, 0xB1, 0xC0, 0x0F, 0x9E, 0x7F, 0x27, 0x5B, 0xB3, 0xE8, 0xA2, 0x8D, 0xC9, 0xA2 }, + { 0xC8, 0x0A, 0xBE, 0xEB, 0xB6, 0x69, 0xAD, 0x5D, 0xEE, 0xB5, 0xF5, 0xEC, 0x8E, 0xA6, 0xB7, 0xA0, 0x5D, 0xDF, 0x7D, 0x31, 0xEC, 0x4C, 0x0A, 0x2E, 0xE2, 0x0B, 0x0B, 0x98, 0xCA, 0xEC, 0x67, 0x46 }, + { 0xE7, 0x6D, 0x3F, 0xBD, 0xA5, 0xBA, 0x37, 0x4E, 0x6B, 0xF8, 0xE5, 0x0F, 0xAD, 0xC3, 0xBB, 0xB9, 0xBA, 0x5C, 0x20, 0x6E, 0xBD, 0xEC, 0x89, 0xA3, 0xA5, 0x4C, 0xF3, 0xDD, 0x84, 0xA0, 0x70, 0x16 }, + { 0x7B, 0xBA, 0x9D, 0xC5, 0xB5, 0xDB, 0x20, 0x71, 0xD1, 0x77, 0x52, 0xB1, 0x04, 0x4C, 0x1E, 0xCE, 0xD9, 0x6A, 0xAF, 0x2D, 0xD4, 0x6E, 0x9B, 0x43, 0x37, 0x50, 0xE8, 0xEA, 0x0D, 0xCC, 0x18, 0x70 }, + { 0xF2, 0x9B, 0x1B, 0x1A, 0xB9, 0xBA, 0xB1, 0x63, 0x01, 0x8E, 0xE3, 0xDA, 0x15, 0x23, 0x2C, 0xCA, 0x78, 0xEC, 0x52, 0xDB, 0xC3, 0x4E, 0xDA, 0x5B, 0x82, 0x2E, 0xC1, 0xD8, 0x0F, 0xC2, 0x1B, 0xD0 }, + { 0x9E, 0xE3, 0xE3, 0xE7, 0xE9, 0x00, 0xF1, 0xE1, 0x1D, 0x30, 0x8C, 0x4B, 0x2B, 0x30, 0x76, 0xD2, 0x72, 0xCF, 0x70, 0x12, 0x4F, 0x9F, 0x51, 0xE1, 0xDA, 0x60, 0xF3, 0x78, 0x46, 0xCD, 0xD2, 0xF4 }, + { 0x70, 0xEA, 0x3B, 0x01, 0x76, 0x92, 0x7D, 0x90, 0x96, 0xA1, 0x85, 0x08, 0xCD, 0x12, 0x3A, 0x29, 0x03, 0x25, 0x92, 0x0A, 0x9D, 0x00, 0xA8, 0x9B, 0x5D, 0xE0, 0x42, 0x73, 0xFB, 0xC7, 0x6B, 0x85 }, + { 0x67, 0xDE, 0x25, 0xC0, 0x2A, 0x4A, 0xAB, 0xA2, 0x3B, 0xDC, 0x97, 0x3C, 0x8B, 0xB0, 0xB5, 0x79, 0x6D, 0x47, 0xCC, 0x06, 0x59, 0xD4, 0x3D, 0xFF, 0x1F, 0x97, 0xDE, 0x17, 0x49, 0x63, 0xB6, 0x8E }, + { 0xB2, 0x16, 0x8E, 0x4E, 0x0F, 0x18, 0xB0, 0xE6, 0x41, 0x00, 0xB5, 0x17, 0xED, 0x95, 0x25, 0x7D, 0x73, 0xF0, 0x62, 0x0D, 0xF8, 0x85, 0xC1, 0x3D, 0x2E, 0xCF, 0x79, 0x36, 0x7B, 0x38, 0x4C, 0xEE }, + { 0x2E, 0x7D, 0xEC, 0x24, 0x28, 0x85, 0x3B, 0x2C, 0x71, 0x76, 0x07, 0x45, 0x54, 0x1F, 0x7A, 0xFE, 0x98, 0x25, 0xB5, 0xDD, 0x77, 0xDF, 0x06, 0x51, 0x1D, 0x84, 0x41, 0xA9, 0x4B, 0xAC, 0xC9, 0x27 }, + { 0xCA, 0x9F, 0xFA, 0xC4, 0xC4, 0x3F, 0x0B, 0x48, 0x46, 0x1D, 0xC5, 0xC2, 0x63, 0xBE, 0xA3, 0xF6, 0xF0, 0x06, 0x11, 0xCE, 0xAC, 0xAB, 0xF6, 0xF8, 0x95, 0xBA, 0x2B, 0x01, 0x01, 0xDB, 0xB6, 0x8D }, + { 0x74, 0x10, 0xD4, 0x2D, 0x8F, 0xD1, 0xD5, 0xE9, 0xD2, 0xF5, 0x81, 0x5C, 0xB9, 0x34, 0x17, 0x99, 0x88, 0x28, 0xEF, 0x3C, 0x42, 0x30, 0xBF, 0xBD, 0x41, 0x2D, 0xF0, 0xA4, 0xA7, 0xA2, 0x50, 0x7A }, + { 0x50, 0x10, 0xF6, 0x84, 0x51, 0x6D, 0xCC, 0xD0, 0xB6, 0xEE, 0x08, 0x52, 0xC2, 0x51, 0x2B, 0x4D, 0xC0, 0x06, 0x6C, 0xF0, 0xD5, 0x6F, 0x35, 0x30, 0x29, 0x78, 0xDB, 0x8A, 0xE3, 0x2C, 0x6A, 0x81 }, + { 0xAC, 0xAA, 0xB5, 0x85, 0xF7, 0xB7, 0x9B, 0x71, 0x99, 0x35, 0xCE, 0xB8, 0x95, 0x23, 0xDD, 0xC5, 0x48, 0x27, 0xF7, 0x5C, 0x56, 0x88, 0x38, 0x56, 0x15, 0x4A, 0x56, 0xCD, 0xCD, 0x5E, 0xE9, 0x88 }, + { 0x66, 0x6D, 0xE5, 0xD1, 0x44, 0x0F, 0xEE, 0x73, 0x31, 0xAA, 0xF0, 0x12, 0x3A, 0x62, 0xEF, 0x2D, 0x8B, 0xA5, 0x74, 0x53, 0xA0, 0x76, 0x96, 0x35, 0xAC, 0x6C, 0xD0, 0x1E, 0x63, 0x3F, 0x77, 0x12 }, + { 0xA6, 0xF9, 0x86, 0x58, 0xF6, 0xEA, 0xBA, 0xF9, 0x02, 0xD8, 0xB3, 0x87, 0x1A, 0x4B, 0x10, 0x1D, 0x16, 0x19, 0x6E, 0x8A, 0x4B, 0x24, 0x1E, 0x15, 0x58, 0xFE, 0x29, 0x96, 0x6E, 0x10, 0x3E, 0x8D }, + { 0x89, 0x15, 0x46, 0xA8, 0xB2, 0x9F, 0x30, 0x47, 0xDD, 0xCF, 0xE5, 0xB0, 0x0E, 0x45, 0xFD, 0x55, 0x75, 0x63, 0x73, 0x10, 0x5E, 0xA8, 0x63, 0x7D, 0xFC, 0xFF, 0x54, 0x7B, 0x6E, 0xA9, 0x53, 0x5F }, + { 0x18, 0xDF, 0xBC, 0x1A, 0xC5, 0xD2, 0x5B, 0x07, 0x61, 0x13, 0x7D, 0xBD, 0x22, 0xC1, 0x7C, 0x82, 0x9D, 0x0F, 0x0E, 0xF1, 0xD8, 0x23, 0x44, 0xE9, 0xC8, 0x9C, 0x28, 0x66, 0x94, 0xDA, 0x24, 0xE8 }, + { 0xB5, 0x4B, 0x9B, 0x67, 0xF8, 0xFE, 0xD5, 0x4B, 0xBF, 0x5A, 0x26, 0x66, 0xDB, 0xDF, 0x4B, 0x23, 0xCF, 0xF1, 0xD1, 0xB6, 0xF4, 0xAF, 0xC9, 0x85, 0xB2, 0xE6, 0xD3, 0x30, 0x5A, 0x9F, 0xF8, 0x0F }, + { 0x7D, 0xB4, 0x42, 0xE1, 0x32, 0xBA, 0x59, 0xBC, 0x12, 0x89, 0xAA, 0x98, 0xB0, 0xD3, 0xE8, 0x06, 0x00, 0x4F, 0x8E, 0xC1, 0x28, 0x11, 0xAF, 0x1E, 0x2E, 0x33, 0xC6, 0x9B, 0xFD, 0xE7, 0x29, 0xE1 }, + { 0x25, 0x0F, 0x37, 0xCD, 0xC1, 0x5E, 0x81, 0x7D, 0x2F, 0x16, 0x0D, 0x99, 0x56, 0xC7, 0x1F, 0xE3, 0xEB, 0x5D, 0xB7, 0x45, 0x56, 0xE4, 0xAD, 0xF9, 0xA4, 0xFF, 0xAF, 0xBA, 0x74, 0x01, 0x03, 0x96 }, + { 0x4A, 0xB8, 0xA3, 0xDD, 0x1D, 0xDF, 0x8A, 0xD4, 0x3D, 0xAB, 0x13, 0xA2, 0x7F, 0x66, 0xA6, 0x54, 0x4F, 0x29, 0x05, 0x97, 0xFA, 0x96, 0x04, 0x0E, 0x0E, 0x1D, 0xB9, 0x26, 0x3A, 0xA4, 0x79, 0xF8 }, + { 0xEE, 0x61, 0x72, 0x7A, 0x07, 0x66, 0xDF, 0x93, 0x9C, 0xCD, 0xC8, 0x60, 0x33, 0x40, 0x44, 0xC7, 0x9A, 0x3C, 0x9B, 0x15, 0x62, 0x00, 0xBC, 0x3A, 0xA3, 0x29, 0x73, 0x48, 0x3D, 0x83, 0x41, 0xAE }, + { 0x3F, 0x68, 0xC7, 0xEC, 0x63, 0xAC, 0x11, 0xEB, 0xB9, 0x8F, 0x94, 0xB3, 0x39, 0xB0, 0x5C, 0x10, 0x49, 0x84, 0xFD, 0xA5, 0x01, 0x03, 0x06, 0x01, 0x44, 0xE5, 0xA2, 0xBF, 0xCC, 0xC9, 0xDA, 0x95 }, + { 0x05, 0x6F, 0x29, 0x81, 0x6B, 0x8A, 0xF8, 0xF5, 0x66, 0x82, 0xBC, 0x4D, 0x7C, 0xF0, 0x94, 0x11, 0x1D, 0xA7, 0x73, 0x3E, 0x72, 0x6C, 0xD1, 0x3D, 0x6B, 0x3E, 0x8E, 0xA0, 0x3E, 0x92, 0xA0, 0xD5 }, + { 0xF5, 0xEC, 0x43, 0xA2, 0x8A, 0xCB, 0xEF, 0xF1, 0xF3, 0x31, 0x8A, 0x5B, 0xCA, 0xC7, 0xC6, 0x6D, 0xDB, 0x52, 0x30, 0xB7, 0x9D, 0xB2, 0xD1, 0x05, 0xBC, 0xBE, 0x15, 0xF3, 0xC1, 0x14, 0x8D, 0x69 }, + { 0x2A, 0x69, 0x60, 0xAD, 0x1D, 0x8D, 0xD5, 0x47, 0x55, 0x5C, 0xFB, 0xD5, 0xE4, 0x60, 0x0F, 0x1E, 0xAA, 0x1C, 0x8E, 0xDA, 0x34, 0xDE, 0x03, 0x74, 0xEC, 0x4A, 0x26, 0xEA, 0xAA, 0xA3, 0x3B, 0x4E }, + { 0xDC, 0xC1, 0xEA, 0x7B, 0xAA, 0xB9, 0x33, 0x84, 0xF7, 0x6B, 0x79, 0x68, 0x66, 0x19, 0x97, 0x54, 0x74, 0x2F, 0x7B, 0x96, 0xD6, 0xB4, 0xC1, 0x20, 0x16, 0x5C, 0x04, 0xA6, 0xC4, 0xF5, 0xCE, 0x10 }, + { 0x13, 0xD5, 0xDF, 0x17, 0x92, 0x21, 0x37, 0x9C, 0x6A, 0x78, 0xC0, 0x7C, 0x79, 0x3F, 0xF5, 0x34, 0x87, 0xCA, 0xE6, 0xBF, 0x9F, 0xE8, 0x82, 0x54, 0x1A, 0xB0, 0xE7, 0x35, 0xE3, 0xEA, 0xDA, 0x3B }, + { 0x8C, 0x59, 0xE4, 0x40, 0x76, 0x41, 0xA0, 0x1E, 0x8F, 0xF9, 0x1F, 0x99, 0x80, 0xDC, 0x23, 0x6F, 0x4E, 0xCD, 0x6F, 0xCF, 0x52, 0x58, 0x9A, 0x09, 0x9A, 0x96, 0x16, 0x33, 0x96, 0x77, 0x14, 0xE1 }, + { 0x83, 0x3B, 0x1A, 0xC6, 0xA2, 0x51, 0xFD, 0x08, 0xFD, 0x6D, 0x90, 0x8F, 0xEA, 0x2A, 0x4E, 0xE1, 0xE0, 0x40, 0xBC, 0xA9, 0x3F, 0xC1, 0xA3, 0x8E, 0xC3, 0x82, 0x0E, 0x0C, 0x10, 0xBD, 0x82, 0xEA }, + { 0xA2, 0x44, 0xF9, 0x27, 0xF3, 0xB4, 0x0B, 0x8F, 0x6C, 0x39, 0x15, 0x70, 0xC7, 0x65, 0x41, 0x8F, 0x2F, 0x6E, 0x70, 0x8E, 0xAC, 0x90, 0x06, 0xC5, 0x1A, 0x7F, 0xEF, 0xF4, 0xAF, 0x3B, 0x2B, 0x9E }, + { 0x3D, 0x99, 0xED, 0x95, 0x50, 0xCF, 0x11, 0x96, 0xE6, 0xC4, 0xD2, 0x0C, 0x25, 0x96, 0x20, 0xF8, 0x58, 0xC3, 0xD7, 0x03, 0x37, 0x4C, 0x12, 0x8C, 0xE7, 0xB5, 0x90, 0x31, 0x0C, 0x83, 0x04, 0x6D }, + { 0x2B, 0x35, 0xC4, 0x7D, 0x7B, 0x87, 0x76, 0x1F, 0x0A, 0xE4, 0x3A, 0xC5, 0x6A, 0xC2, 0x7B, 0x9F, 0x25, 0x83, 0x03, 0x67, 0xB5, 0x95, 0xBE, 0x8C, 0x24, 0x0E, 0x94, 0x60, 0x0C, 0x6E, 0x33, 0x12 }, + { 0x5D, 0x11, 0xED, 0x37, 0xD2, 0x4D, 0xC7, 0x67, 0x30, 0x5C, 0xB7, 0xE1, 0x46, 0x7D, 0x87, 0xC0, 0x65, 0xAC, 0x4B, 0xC8, 0xA4, 0x26, 0xDE, 0x38, 0x99, 0x1F, 0xF5, 0x9A, 0xA8, 0x73, 0x5D, 0x02 }, + { 0xB8, 0x36, 0x47, 0x8E, 0x1C, 0xA0, 0x64, 0x0D, 0xCE, 0x6F, 0xD9, 0x10, 0xA5, 0x09, 0x62, 0x72, 0xC8, 0x33, 0x09, 0x90, 0xCD, 0x97, 0x86, 0x4A, 0xC2, 0xBF, 0x14, 0xEF, 0x6B, 0x23, 0x91, 0x4A }, + { 0x91, 0x00, 0xF9, 0x46, 0xD6, 0xCC, 0xDE, 0x3A, 0x59, 0x7F, 0x90, 0xD3, 0x9F, 0xC1, 0x21, 0x5B, 0xAD, 0xDC, 0x74, 0x13, 0x64, 0x3D, 0x85, 0xC2, 0x1C, 0x3E, 0xEE, 0x5D, 0x2D, 0xD3, 0x28, 0x94 }, + { 0xDA, 0x70, 0xEE, 0xDD, 0x23, 0xE6, 0x63, 0xAA, 0x1A, 0x74, 0xB9, 0x76, 0x69, 0x35, 0xB4, 0x79, 0x22, 0x2A, 0x72, 0xAF, 0xBA, 0x5C, 0x79, 0x51, 0x58, 0xDA, 0xD4, 0x1A, 0x3B, 0xD7, 0x7E, 0x40 }, + { 0xF0, 0x67, 0xED, 0x6A, 0x0D, 0xBD, 0x43, 0xAA, 0x0A, 0x92, 0x54, 0xE6, 0x9F, 0xD6, 0x6B, 0xDD, 0x8A, 0xCB, 0x87, 0xDE, 0x93, 0x6C, 0x25, 0x8C, 0xFB, 0x02, 0x28, 0x5F, 0x2C, 0x11, 0xFA, 0x79 }, + { 0x71, 0x5C, 0x99, 0xC7, 0xD5, 0x75, 0x80, 0xCF, 0x97, 0x53, 0xB4, 0xC1, 0xD7, 0x95, 0xE4, 0x5A, 0x83, 0xFB, 0xB2, 0x28, 0xC0, 0xD3, 0x6F, 0xBE, 0x20, 0xFA, 0xF3, 0x9B, 0xDD, 0x6D, 0x4E, 0x85 }, + { 0xE4, 0x57, 0xD6, 0xAD, 0x1E, 0x67, 0xCB, 0x9B, 0xBD, 0x17, 0xCB, 0xD6, 0x98, 0xFA, 0x6D, 0x7D, 0xAE, 0x0C, 0x9B, 0x7A, 0xD6, 0xCB, 0xD6, 0x53, 0x96, 0x34, 0xE3, 0x2A, 0x71, 0x9C, 0x84, 0x92 }, + { 0xEC, 0xE3, 0xEA, 0x81, 0x03, 0xE0, 0x24, 0x83, 0xC6, 0x4A, 0x70, 0xA4, 0xBD, 0xCE, 0xE8, 0xCE, 0xB6, 0x27, 0x8F, 0x25, 0x33, 0xF3, 0xF4, 0x8D, 0xBE, 0xED, 0xFB, 0xA9, 0x45, 0x31, 0xD4, 0xAE }, + { 0x38, 0x8A, 0xA5, 0xD3, 0x66, 0x7A, 0x97, 0xC6, 0x8D, 0x3D, 0x56, 0xF8, 0xF3, 0xEE, 0x8D, 0x3D, 0x36, 0x09, 0x1F, 0x17, 0xFE, 0x5D, 0x1B, 0x0D, 0x5D, 0x84, 0xC9, 0x3B, 0x2F, 0xFE, 0x40, 0xBD }, + { 0x8B, 0x6B, 0x31, 0xB9, 0xAD, 0x7C, 0x3D, 0x5C, 0xD8, 0x4B, 0xF9, 0x89, 0x47, 0xB9, 0xCD, 0xB5, 0x9D, 0xF8, 0xA2, 0x5F, 0xF7, 0x38, 0x10, 0x10, 0x13, 0xBE, 0x4F, 0xD6, 0x5E, 0x1D, 0xD1, 0xA3 }, + { 0x06, 0x62, 0x91, 0xF6, 0xBB, 0xD2, 0x5F, 0x3C, 0x85, 0x3D, 0xB7, 0xD8, 0xB9, 0x5C, 0x9A, 0x1C, 0xFB, 0x9B, 0xF1, 0xC1, 0xC9, 0x9F, 0xB9, 0x5A, 0x9B, 0x78, 0x69, 0xD9, 0x0F, 0x1C, 0x29, 0x03 }, + { 0xA7, 0x07, 0xEF, 0xBC, 0xCD, 0xCE, 0xED, 0x42, 0x96, 0x7A, 0x66, 0xF5, 0x53, 0x9B, 0x93, 0xED, 0x75, 0x60, 0xD4, 0x67, 0x30, 0x40, 0x16, 0xC4, 0x78, 0x0D, 0x77, 0x55, 0xA5, 0x65, 0xD4, 0xC4 }, + { 0x38, 0xC5, 0x3D, 0xFB, 0x70, 0xBE, 0x7E, 0x79, 0x2B, 0x07, 0xA6, 0xA3, 0x5B, 0x8A, 0x6A, 0x0A, 0xBA, 0x02, 0xC5, 0xC5, 0xF3, 0x8B, 0xAF, 0x5C, 0x82, 0x3F, 0xDF, 0xD9, 0xE4, 0x2D, 0x65, 0x7E }, + { 0xF2, 0x91, 0x13, 0x86, 0x50, 0x1D, 0x9A, 0xB9, 0xD7, 0x20, 0xCF, 0x8A, 0xD1, 0x05, 0x03, 0xD5, 0x63, 0x4B, 0xF4, 0xB7, 0xD1, 0x2B, 0x56, 0xDF, 0xB7, 0x4F, 0xEC, 0xC6, 0xE4, 0x09, 0x3F, 0x68 }, + { 0xC6, 0xF2, 0xBD, 0xD5, 0x2B, 0x81, 0xE6, 0xE4, 0xF6, 0x59, 0x5A, 0xBD, 0x4D, 0x7F, 0xB3, 0x1F, 0x65, 0x11, 0x69, 0xD0, 0x0F, 0xF3, 0x26, 0x92, 0x6B, 0x34, 0x94, 0x7B, 0x28, 0xA8, 0x39, 0x59 }, + { 0x29, 0x3D, 0x94, 0xB1, 0x8C, 0x98, 0xBB, 0x32, 0x23, 0x36, 0x6B, 0x8C, 0xE7, 0x4C, 0x28, 0xFB, 0xDF, 0x28, 0xE1, 0xF8, 0x4A, 0x33, 0x50, 0xB0, 0xEB, 0x2D, 0x18, 0x04, 0xA5, 0x77, 0x57, 0x9B }, + { 0x2C, 0x2F, 0xA5, 0xC0, 0xB5, 0x15, 0x33, 0x16, 0x5B, 0xC3, 0x75, 0xC2, 0x2E, 0x27, 0x81, 0x76, 0x82, 0x70, 0xA3, 0x83, 0x98, 0x5D, 0x13, 0xBD, 0x6B, 0x67, 0xB6, 0xFD, 0x67, 0xF8, 0x89, 0xEB }, + { 0xCA, 0xA0, 0x9B, 0x82, 0xB7, 0x25, 0x62, 0xE4, 0x3F, 0x4B, 0x22, 0x75, 0xC0, 0x91, 0x91, 0x8E, 0x62, 0x4D, 0x91, 0x16, 0x61, 0xCC, 0x81, 0x1B, 0xB5, 0xFA, 0xEC, 0x51, 0xF6, 0x08, 0x8E, 0xF7 }, + { 0x24, 0x76, 0x1E, 0x45, 0xE6, 0x74, 0x39, 0x53, 0x79, 0xFB, 0x17, 0x72, 0x9C, 0x78, 0xCB, 0x93, 0x9E, 0x6F, 0x74, 0xC5, 0xDF, 0xFB, 0x9C, 0x96, 0x1F, 0x49, 0x59, 0x82, 0xC3, 0xED, 0x1F, 0xE3 }, + { 0x55, 0xB7, 0x0A, 0x82, 0x13, 0x1E, 0xC9, 0x48, 0x88, 0xD7, 0xAB, 0x54, 0xA7, 0xC5, 0x15, 0x25, 0x5C, 0x39, 0x38, 0xBB, 0x10, 0xBC, 0x78, 0x4D, 0xC9, 0xB6, 0x7F, 0x07, 0x6E, 0x34, 0x1A, 0x73 }, + { 0x6A, 0xB9, 0x05, 0x7B, 0x97, 0x7E, 0xBC, 0x3C, 0xA4, 0xD4, 0xCE, 0x74, 0x50, 0x6C, 0x25, 0xCC, 0xCD, 0xC5, 0x66, 0x49, 0x7C, 0x45, 0x0B, 0x54, 0x15, 0xA3, 0x94, 0x86, 0xF8, 0x65, 0x7A, 0x03 }, + { 0x24, 0x06, 0x6D, 0xEE, 0xE0, 0xEC, 0xEE, 0x15, 0xA4, 0x5F, 0x0A, 0x32, 0x6D, 0x0F, 0x8D, 0xBC, 0x79, 0x76, 0x1E, 0xBB, 0x93, 0xCF, 0x8C, 0x03, 0x77, 0xAF, 0x44, 0x09, 0x78, 0xFC, 0xF9, 0x94 }, + { 0x20, 0x00, 0x0D, 0x3F, 0x66, 0xBA, 0x76, 0x86, 0x0D, 0x5A, 0x95, 0x06, 0x88, 0xB9, 0xAA, 0x0D, 0x76, 0xCF, 0xEA, 0x59, 0xB0, 0x05, 0xD8, 0x59, 0x91, 0x4B, 0x1A, 0x46, 0x65, 0x3A, 0x93, 0x9B }, + { 0xB9, 0x2D, 0xAA, 0x79, 0x60, 0x3E, 0x3B, 0xDB, 0xC3, 0xBF, 0xE0, 0xF4, 0x19, 0xE4, 0x09, 0xB2, 0xEA, 0x10, 0xDC, 0x43, 0x5B, 0xEE, 0xFE, 0x29, 0x59, 0xDA, 0x16, 0x89, 0x5D, 0x5D, 0xCA, 0x1C }, + { 0xE9, 0x47, 0x94, 0x87, 0x05, 0xB2, 0x06, 0xD5, 0x72, 0xB0, 0xE8, 0xF6, 0x2F, 0x66, 0xA6, 0x55, 0x1C, 0xBD, 0x6B, 0xC3, 0x05, 0xD2, 0x6C, 0xE7, 0x53, 0x9A, 0x12, 0xF9, 0xAA, 0xDF, 0x75, 0x71 }, + { 0x3D, 0x67, 0xC1, 0xB3, 0xF9, 0xB2, 0x39, 0x10, 0xE3, 0xD3, 0x5E, 0x6B, 0x0F, 0x2C, 0xCF, 0x44, 0xA0, 0xB5, 0x40, 0xA4, 0x5C, 0x18, 0xBA, 0x3C, 0x36, 0x26, 0x4D, 0xD4, 0x8E, 0x96, 0xAF, 0x6A }, + { 0xC7, 0x55, 0x8B, 0xAB, 0xDA, 0x04, 0xBC, 0xCB, 0x76, 0x4D, 0x0B, 0xBF, 0x33, 0x58, 0x42, 0x51, 0x41, 0x90, 0x2D, 0x22, 0x39, 0x1D, 0x9F, 0x8C, 0x59, 0x15, 0x9F, 0xEC, 0x9E, 0x49, 0xB1, 0x51 }, + { 0x0B, 0x73, 0x2B, 0xB0, 0x35, 0x67, 0x5A, 0x50, 0xFF, 0x58, 0xF2, 0xC2, 0x42, 0xE4, 0x71, 0x0A, 0xEC, 0xE6, 0x46, 0x70, 0x07, 0x9C, 0x13, 0x04, 0x4C, 0x79, 0xC9, 0xB7, 0x49, 0x1F, 0x70, 0x00 }, + { 0xD1, 0x20, 0xB5, 0xEF, 0x6D, 0x57, 0xEB, 0xF0, 0x6E, 0xAF, 0x96, 0xBC, 0x93, 0x3C, 0x96, 0x7B, 0x16, 0xCB, 0xE6, 0xE2, 0xBF, 0x00, 0x74, 0x1C, 0x30, 0xAA, 0x1C, 0x54, 0xBA, 0x64, 0x80, 0x1F }, + { 0x58, 0xD2, 0x12, 0xAD, 0x6F, 0x58, 0xAE, 0xF0, 0xF8, 0x01, 0x16, 0xB4, 0x41, 0xE5, 0x7F, 0x61, 0x95, 0xBF, 0xEF, 0x26, 0xB6, 0x14, 0x63, 0xED, 0xEC, 0x11, 0x83, 0xCD, 0xB0, 0x4F, 0xE7, 0x6D }, + { 0xB8, 0x83, 0x6F, 0x51, 0xD1, 0xE2, 0x9B, 0xDF, 0xDB, 0xA3, 0x25, 0x56, 0x53, 0x60, 0x26, 0x8B, 0x8F, 0xAD, 0x62, 0x74, 0x73, 0xED, 0xEC, 0xEF, 0x7E, 0xAE, 0xFE, 0xE8, 0x37, 0xC7, 0x40, 0x03 }, + { 0xC5, 0x47, 0xA3, 0xC1, 0x24, 0xAE, 0x56, 0x85, 0xFF, 0xA7, 0xB8, 0xED, 0xAF, 0x96, 0xEC, 0x86, 0xF8, 0xB2, 0xD0, 0xD5, 0x0C, 0xEE, 0x8B, 0xE3, 0xB1, 0xF0, 0xC7, 0x67, 0x63, 0x06, 0x9D, 0x9C }, + { 0x5D, 0x16, 0x8B, 0x76, 0x9A, 0x2F, 0x67, 0x85, 0x3D, 0x62, 0x95, 0xF7, 0x56, 0x8B, 0xE4, 0x0B, 0xB7, 0xA1, 0x6B, 0x8D, 0x65, 0xBA, 0x87, 0x63, 0x5D, 0x19, 0x78, 0xD2, 0xAB, 0x11, 0xBA, 0x2A }, + { 0xA2, 0xF6, 0x75, 0xDC, 0x73, 0x02, 0x63, 0x8C, 0xB6, 0x02, 0x01, 0x06, 0x4C, 0xA5, 0x50, 0x77, 0x71, 0x4D, 0x71, 0xFE, 0x09, 0x6A, 0x31, 0x5F, 0x2F, 0xE7, 0x40, 0x12, 0x77, 0xCA, 0xA5, 0xAF }, + { 0xC8, 0xAA, 0xB5, 0xCD, 0x01, 0x60, 0xAE, 0x78, 0xCD, 0x2E, 0x8A, 0xC5, 0xFB, 0x0E, 0x09, 0x3C, 0xDB, 0x5C, 0x4B, 0x60, 0x52, 0xA0, 0xA9, 0x7B, 0xB0, 0x42, 0x16, 0x82, 0x6F, 0xA7, 0xA4, 0x37 }, + { 0xFF, 0x68, 0xCA, 0x40, 0x35, 0xBF, 0xEB, 0x43, 0xFB, 0xF1, 0x45, 0xFD, 0xDD, 0x5E, 0x43, 0xF1, 0xCE, 0xA5, 0x4F, 0x11, 0xF7, 0xBE, 0xE1, 0x30, 0x58, 0xF0, 0x27, 0x32, 0x9A, 0x4A, 0x5F, 0xA4 }, + { 0x1D, 0x4E, 0x54, 0x87, 0xAE, 0x3C, 0x74, 0x0F, 0x2B, 0xA6, 0xE5, 0x41, 0xAC, 0x91, 0xBC, 0x2B, 0xFC, 0xD2, 0x99, 0x9C, 0x51, 0x8D, 0x80, 0x7B, 0x42, 0x67, 0x48, 0x80, 0x3A, 0x35, 0x0F, 0xD4 }, + { 0x6D, 0x24, 0x4E, 0x1A, 0x06, 0xCE, 0x4E, 0xF5, 0x78, 0xDD, 0x0F, 0x63, 0xAF, 0xF0, 0x93, 0x67, 0x06, 0x73, 0x51, 0x19, 0xCA, 0x9C, 0x8D, 0x22, 0xD8, 0x6C, 0x80, 0x14, 0x14, 0xAB, 0x97, 0x41 }, + { 0xDE, 0xCF, 0x73, 0x29, 0xDB, 0xCC, 0x82, 0x7B, 0x8F, 0xC5, 0x24, 0xC9, 0x43, 0x1E, 0x89, 0x98, 0x02, 0x9E, 0xCE, 0x12, 0xCE, 0x93, 0xB7, 0xB2, 0xF3, 0xE7, 0x69, 0xA9, 0x41, 0xFB, 0x8C, 0xEA }, + { 0x2F, 0xAF, 0xCC, 0x0F, 0x2E, 0x63, 0xCB, 0xD0, 0x77, 0x55, 0xBE, 0x7B, 0x75, 0xEC, 0xEA, 0x0A, 0xDF, 0xF9, 0xAA, 0x5E, 0xDE, 0x2A, 0x52, 0xFD, 0xAB, 0x4D, 0xFD, 0x03, 0x74, 0xCD, 0x48, 0x3F }, + { 0xAA, 0x85, 0x01, 0x0D, 0xD4, 0x6A, 0x54, 0x6B, 0x53, 0x5E, 0xF4, 0xCF, 0x5F, 0x07, 0xD6, 0x51, 0x61, 0xE8, 0x98, 0x28, 0xF3, 0xA7, 0x7D, 0xB7, 0xB9, 0xB5, 0x6F, 0x0D, 0xF5, 0x9A, 0xAE, 0x45 }, + { 0x07, 0xE8, 0xE1, 0xEE, 0x73, 0x2C, 0xB0, 0xD3, 0x56, 0xC9, 0xC0, 0xD1, 0x06, 0x9C, 0x89, 0xD1, 0x7A, 0xDF, 0x6A, 0x9A, 0x33, 0x4F, 0x74, 0x5E, 0xC7, 0x86, 0x73, 0x32, 0x54, 0x8C, 0xA8, 0xE9 }, + { 0x0E, 0x01, 0xE8, 0x1C, 0xAD, 0xA8, 0x16, 0x2B, 0xFD, 0x5F, 0x8A, 0x8C, 0x81, 0x8A, 0x6C, 0x69, 0xFE, 0xDF, 0x02, 0xCE, 0xB5, 0x20, 0x85, 0x23, 0xCB, 0xE5, 0x31, 0x3B, 0x89, 0xCA, 0x10, 0x53 }, + { 0x6B, 0xB6, 0xC6, 0x47, 0x26, 0x55, 0x08, 0x43, 0x99, 0x85, 0x2E, 0x00, 0x24, 0x9F, 0x8C, 0xB2, 0x47, 0x89, 0x6D, 0x39, 0x2B, 0x02, 0xD7, 0x3B, 0x7F, 0x0D, 0xD8, 0x18, 0xE1, 0xE2, 0x9B, 0x07 }, + { 0x42, 0xD4, 0x63, 0x6E, 0x20, 0x60, 0xF0, 0x8F, 0x41, 0xC8, 0x82, 0xE7, 0x6B, 0x39, 0x6B, 0x11, 0x2E, 0xF6, 0x27, 0xCC, 0x24, 0xC4, 0x3D, 0xD5, 0xF8, 0x3A, 0x1D, 0x1A, 0x7E, 0xAD, 0x71, 0x1A }, + { 0x48, 0x58, 0xC9, 0xA1, 0x88, 0xB0, 0x23, 0x4F, 0xB9, 0xA8, 0xD4, 0x7D, 0x0B, 0x41, 0x33, 0x65, 0x0A, 0x03, 0x0B, 0xD0, 0x61, 0x1B, 0x87, 0xC3, 0x89, 0x2E, 0x94, 0x95, 0x1F, 0x8D, 0xF8, 0x52 }, + { 0x3F, 0xAB, 0x3E, 0x36, 0x98, 0x8D, 0x44, 0x5A, 0x51, 0xC8, 0x78, 0x3E, 0x53, 0x1B, 0xE3, 0xA0, 0x2B, 0xE4, 0x0C, 0xD0, 0x47, 0x96, 0xCF, 0xB6, 0x1D, 0x40, 0x34, 0x74, 0x42, 0xD3, 0xF7, 0x94 }, + { 0xEB, 0xAB, 0xC4, 0x96, 0x36, 0xBD, 0x43, 0x3D, 0x2E, 0xC8, 0xF0, 0xE5, 0x18, 0x73, 0x2E, 0xF8, 0xFA, 0x21, 0xD4, 0xD0, 0x71, 0xCC, 0x3B, 0xC4, 0x6C, 0xD7, 0x9F, 0xA3, 0x8A, 0x28, 0xB8, 0x10 }, + { 0xA1, 0xD0, 0x34, 0x35, 0x23, 0xB8, 0x93, 0xFC, 0xA8, 0x4F, 0x47, 0xFE, 0xB4, 0xA6, 0x4D, 0x35, 0x0A, 0x17, 0xD8, 0xEE, 0xF5, 0x49, 0x7E, 0xCE, 0x69, 0x7D, 0x02, 0xD7, 0x91, 0x78, 0xB5, 0x91 }, + { 0x26, 0x2E, 0xBF, 0xD9, 0x13, 0x0B, 0x7D, 0x28, 0x76, 0x0D, 0x08, 0xEF, 0x8B, 0xFD, 0x3B, 0x86, 0xCD, 0xD3, 0xB2, 0x11, 0x3D, 0x2C, 0xAE, 0xF7, 0xEA, 0x95, 0x1A, 0x30, 0x3D, 0xFA, 0x38, 0x46 }, + { 0xF7, 0x61, 0x58, 0xED, 0xD5, 0x0A, 0x15, 0x4F, 0xA7, 0x82, 0x03, 0xED, 0x23, 0x62, 0x93, 0x2F, 0xCB, 0x82, 0x53, 0xAA, 0xE3, 0x78, 0x90, 0x3E, 0xDE, 0xD1, 0xE0, 0x3F, 0x70, 0x21, 0xA2, 0x57 }, + { 0x26, 0x17, 0x8E, 0x95, 0x0A, 0xC7, 0x22, 0xF6, 0x7A, 0xE5, 0x6E, 0x57, 0x1B, 0x28, 0x4C, 0x02, 0x07, 0x68, 0x4A, 0x63, 0x34, 0xA1, 0x77, 0x48, 0xA9, 0x4D, 0x26, 0x0B, 0xC5, 0xF5, 0x52, 0x74 }, + { 0xC3, 0x78, 0xD1, 0xE4, 0x93, 0xB4, 0x0E, 0xF1, 0x1F, 0xE6, 0xA1, 0x5D, 0x9C, 0x27, 0x37, 0xA3, 0x78, 0x09, 0x63, 0x4C, 0x5A, 0xBA, 0xD5, 0xB3, 0x3D, 0x7E, 0x39, 0x3B, 0x4A, 0xE0, 0x5D, 0x03 }, + { 0x98, 0x4B, 0xD8, 0x37, 0x91, 0x01, 0xBE, 0x8F, 0xD8, 0x06, 0x12, 0xD8, 0xEA, 0x29, 0x59, 0xA7, 0x86, 0x5E, 0xC9, 0x71, 0x85, 0x23, 0x55, 0x01, 0x07, 0xAE, 0x39, 0x38, 0xDF, 0x32, 0x01, 0x1B }, + { 0xC6, 0xF2, 0x5A, 0x81, 0x2A, 0x14, 0x48, 0x58, 0xAC, 0x5C, 0xED, 0x37, 0xA9, 0x3A, 0x9F, 0x47, 0x59, 0xBA, 0x0B, 0x1C, 0x0F, 0xDC, 0x43, 0x1D, 0xCE, 0x35, 0xF9, 0xEC, 0x1F, 0x1F, 0x4A, 0x99 }, + { 0x92, 0x4C, 0x75, 0xC9, 0x44, 0x24, 0xFF, 0x75, 0xE7, 0x4B, 0x8B, 0x4E, 0x94, 0x35, 0x89, 0x58, 0xB0, 0x27, 0xB1, 0x71, 0xDF, 0x5E, 0x57, 0x89, 0x9A, 0xD0, 0xD4, 0xDA, 0xC3, 0x73, 0x53, 0xB6 }, + { 0x0A, 0xF3, 0x58, 0x92, 0xA6, 0x3F, 0x45, 0x93, 0x1F, 0x68, 0x46, 0xED, 0x19, 0x03, 0x61, 0xCD, 0x07, 0x30, 0x89, 0xE0, 0x77, 0x16, 0x57, 0x14, 0xB5, 0x0B, 0x81, 0xA2, 0xE3, 0xDD, 0x9B, 0xA1 }, + { 0xCC, 0x80, 0xCE, 0xFB, 0x26, 0xC3, 0xB2, 0xB0, 0xDA, 0xEF, 0x23, 0x3E, 0x60, 0x6D, 0x5F, 0xFC, 0x80, 0xFA, 0x17, 0x42, 0x7D, 0x18, 0xE3, 0x04, 0x89, 0x67, 0x3E, 0x06, 0xEF, 0x4B, 0x87, 0xF7 }, + { 0xC2, 0xF8, 0xC8, 0x11, 0x74, 0x47, 0xF3, 0x97, 0x8B, 0x08, 0x18, 0xDC, 0xF6, 0xF7, 0x01, 0x16, 0xAC, 0x56, 0xFD, 0x18, 0x4D, 0xD1, 0x27, 0x84, 0x94, 0xE1, 0x03, 0xFC, 0x6D, 0x74, 0xA8, 0x87 }, + { 0xBD, 0xEC, 0xF6, 0xBF, 0xC1, 0xBA, 0x0D, 0xF6, 0xE8, 0x62, 0xC8, 0x31, 0x99, 0x22, 0x07, 0x79, 0x6A, 0xCC, 0x79, 0x79, 0x68, 0x35, 0x88, 0x28, 0xC0, 0x6E, 0x7A, 0x51, 0xE0, 0x90, 0x09, 0x8F }, + { 0x24, 0xD1, 0xA2, 0x6E, 0x3D, 0xAB, 0x02, 0xFE, 0x45, 0x72, 0xD2, 0xAA, 0x7D, 0xBD, 0x3E, 0xC3, 0x0F, 0x06, 0x93, 0xDB, 0x26, 0xF2, 0x73, 0xD0, 0xAB, 0x2C, 0xB0, 0xC1, 0x3B, 0x5E, 0x64, 0x51 }, + { 0xEC, 0x56, 0xF5, 0x8B, 0x09, 0x29, 0x9A, 0x30, 0x0B, 0x14, 0x05, 0x65, 0xD7, 0xD3, 0xE6, 0x87, 0x82, 0xB6, 0xE2, 0xFB, 0xEB, 0x4B, 0x7E, 0xA9, 0x7A, 0xC0, 0x57, 0x98, 0x90, 0x61, 0xDD, 0x3F }, + { 0x11, 0xA4, 0x37, 0xC1, 0xAB, 0xA3, 0xC1, 0x19, 0xDD, 0xFA, 0xB3, 0x1B, 0x3E, 0x8C, 0x84, 0x1D, 0xEE, 0xEB, 0x91, 0x3E, 0xF5, 0x7F, 0x7E, 0x48, 0xF2, 0xC9, 0xCF, 0x5A, 0x28, 0xFA, 0x42, 0xBC }, + { 0x53, 0xC7, 0xE6, 0x11, 0x4B, 0x85, 0x0A, 0x2C, 0xB4, 0x96, 0xC9, 0xB3, 0xC6, 0x9A, 0x62, 0x3E, 0xAE, 0xA2, 0xCB, 0x1D, 0x33, 0xDD, 0x81, 0x7E, 0x47, 0x65, 0xED, 0xAA, 0x68, 0x23, 0xC2, 0x28 }, + { 0x15, 0x4C, 0x3E, 0x96, 0xFE, 0xE5, 0xDB, 0x14, 0xF8, 0x77, 0x3E, 0x18, 0xAF, 0x14, 0x85, 0x79, 0x13, 0x50, 0x9D, 0xA9, 0x99, 0xB4, 0x6C, 0xDD, 0x3D, 0x4C, 0x16, 0x97, 0x60, 0xC8, 0x3A, 0xD2 }, + { 0x40, 0xB9, 0x91, 0x6F, 0x09, 0x3E, 0x02, 0x7A, 0x87, 0x86, 0x64, 0x18, 0x18, 0x92, 0x06, 0x20, 0x47, 0x2F, 0xBC, 0xF6, 0x8F, 0x70, 0x1D, 0x1B, 0x68, 0x06, 0x32, 0xE6, 0x99, 0x6B, 0xDE, 0xD3 }, + { 0x24, 0xC4, 0xCB, 0xBA, 0x07, 0x11, 0x98, 0x31, 0xA7, 0x26, 0xB0, 0x53, 0x05, 0xD9, 0x6D, 0xA0, 0x2F, 0xF8, 0xB1, 0x48, 0xF0, 0xDA, 0x44, 0x0F, 0xE2, 0x33, 0xBC, 0xAA, 0x32, 0xC7, 0x2F, 0x6F }, + { 0x5D, 0x20, 0x15, 0x10, 0x25, 0x00, 0x20, 0xB7, 0x83, 0x68, 0x96, 0x88, 0xAB, 0xBF, 0x8E, 0xCF, 0x25, 0x94, 0xA9, 0x6A, 0x08, 0xF2, 0xBF, 0xEC, 0x6C, 0xE0, 0x57, 0x44, 0x65, 0xDD, 0xED, 0x71 }, + { 0x04, 0x3B, 0x97, 0xE3, 0x36, 0xEE, 0x6F, 0xDB, 0xBE, 0x2B, 0x50, 0xF2, 0x2A, 0xF8, 0x32, 0x75, 0xA4, 0x08, 0x48, 0x05, 0xD2, 0xD5, 0x64, 0x59, 0x62, 0x45, 0x4B, 0x6C, 0x9B, 0x80, 0x53, 0xA0 }, + { 0x56, 0x48, 0x35, 0xCB, 0xAE, 0xA7, 0x74, 0x94, 0x85, 0x68, 0xBE, 0x36, 0xCF, 0x52, 0xFC, 0xDD, 0x83, 0x93, 0x4E, 0xB0, 0xA2, 0x75, 0x12, 0xDB, 0xE3, 0xE2, 0xDB, 0x47, 0xB9, 0xE6, 0x63, 0x5A }, + { 0xF2, 0x1C, 0x33, 0xF4, 0x7B, 0xDE, 0x40, 0xA2, 0xA1, 0x01, 0xC9, 0xCD, 0xE8, 0x02, 0x7A, 0xAF, 0x61, 0xA3, 0x13, 0x7D, 0xE2, 0x42, 0x2B, 0x30, 0x03, 0x5A, 0x04, 0xC2, 0x70, 0x89, 0x41, 0x83 }, + { 0x9D, 0xB0, 0xEF, 0x74, 0xE6, 0x6C, 0xBB, 0x84, 0x2E, 0xB0, 0xE0, 0x73, 0x43, 0xA0, 0x3C, 0x5C, 0x56, 0x7E, 0x37, 0x2B, 0x3F, 0x23, 0xB9, 0x43, 0xC7, 0x88, 0xA4, 0xF2, 0x50, 0xF6, 0x78, 0x91 }, + { 0xAB, 0x8D, 0x08, 0x65, 0x5F, 0xF1, 0xD3, 0xFE, 0x87, 0x58, 0xD5, 0x62, 0x23, 0x5F, 0xD2, 0x3E, 0x7C, 0xF9, 0xDC, 0xAA, 0xD6, 0x58, 0x87, 0x2A, 0x49, 0xE5, 0xD3, 0x18, 0x3B, 0x6C, 0xCE, 0xBD }, + { 0x6F, 0x27, 0xF7, 0x7E, 0x7B, 0xCF, 0x46, 0xA1, 0xE9, 0x63, 0xAD, 0xE0, 0x30, 0x97, 0x33, 0x54, 0x30, 0x31, 0xDC, 0xCD, 0xD4, 0x7C, 0xAA, 0xC1, 0x74, 0xD7, 0xD2, 0x7C, 0xE8, 0x07, 0x7E, 0x8B }, + { 0xE3, 0xCD, 0x54, 0xDA, 0x7E, 0x44, 0x4C, 0xAA, 0x62, 0x07, 0x56, 0x95, 0x25, 0xA6, 0x70, 0xEB, 0xAE, 0x12, 0x78, 0xDE, 0x4E, 0x3F, 0xE2, 0x68, 0x4B, 0x3E, 0x33, 0xF5, 0xEF, 0x90, 0xCC, 0x1B }, + { 0xB2, 0xC3, 0xE3, 0x3A, 0x51, 0xD2, 0x2C, 0x4C, 0x08, 0xFC, 0x09, 0x89, 0xC8, 0x73, 0xC9, 0xCC, 0x41, 0x50, 0x57, 0x9B, 0x1E, 0x61, 0x63, 0xFA, 0x69, 0x4A, 0xD5, 0x1D, 0x53, 0xD7, 0x12, 0xDC }, + { 0xBE, 0x7F, 0xDA, 0x98, 0x3E, 0x13, 0x18, 0x9B, 0x4C, 0x77, 0xE0, 0xA8, 0x09, 0x20, 0xB6, 0xE0, 0xE0, 0xEA, 0x80, 0xC3, 0xB8, 0x4D, 0xBE, 0x7E, 0x71, 0x17, 0xD2, 0x53, 0xF4, 0x81, 0x12, 0xF4 }, + { 0xB6, 0x00, 0x8C, 0x28, 0xFA, 0xE0, 0x8A, 0xA4, 0x27, 0xE5, 0xBD, 0x3A, 0xAD, 0x36, 0xF1, 0x00, 0x21, 0xF1, 0x6C, 0x77, 0xCF, 0xEA, 0xBE, 0xD0, 0x7F, 0x97, 0xCC, 0x7D, 0xC1, 0xF1, 0x28, 0x4A }, + { 0x6E, 0x4E, 0x67, 0x60, 0xC5, 0x38, 0xF2, 0xE9, 0x7B, 0x3A, 0xDB, 0xFB, 0xBC, 0xDE, 0x57, 0xF8, 0x96, 0x6B, 0x7E, 0xA8, 0xFC, 0xB5, 0xBF, 0x7E, 0xFE, 0xC9, 0x13, 0xFD, 0x2A, 0x2B, 0x0C, 0x55 }, + { 0x4A, 0xE5, 0x1F, 0xD1, 0x83, 0x4A, 0xA5, 0xBD, 0x9A, 0x6F, 0x7E, 0xC3, 0x9F, 0xC6, 0x63, 0x33, 0x8D, 0xC5, 0xD2, 0xE2, 0x07, 0x61, 0x56, 0x6D, 0x90, 0xCC, 0x68, 0xB1, 0xCB, 0x87, 0x5E, 0xD8 }, + { 0xB6, 0x73, 0xAA, 0xD7, 0x5A, 0xB1, 0xFD, 0xB5, 0x40, 0x1A, 0xBF, 0xA1, 0xBF, 0x89, 0xF3, 0xAD, 0xD2, 0xEB, 0xC4, 0x68, 0xDF, 0x36, 0x24, 0xA4, 0x78, 0xF4, 0xFE, 0x85, 0x9D, 0x8D, 0x55, 0xE2 }, + { 0x13, 0xC9, 0x47, 0x1A, 0x98, 0x55, 0x91, 0x35, 0x39, 0x83, 0x66, 0x60, 0x39, 0x8D, 0xA0, 0xF3, 0xF9, 0x9A, 0xDA, 0x08, 0x47, 0x9C, 0x69, 0xD1, 0xB7, 0xFC, 0xAA, 0x34, 0x61, 0xDD, 0x7E, 0x59 }, + { 0x2C, 0x11, 0xF4, 0xA7, 0xF9, 0x9A, 0x1D, 0x23, 0xA5, 0x8B, 0xB6, 0x36, 0x35, 0x0F, 0xE8, 0x49, 0xF2, 0x9C, 0xBA, 0xC1, 0xB2, 0xA1, 0x11, 0x2D, 0x9F, 0x1E, 0xD5, 0xBC, 0x5B, 0x31, 0x3C, 0xCD }, + { 0xC7, 0xD3, 0xC0, 0x70, 0x6B, 0x11, 0xAE, 0x74, 0x1C, 0x05, 0xA1, 0xEF, 0x15, 0x0D, 0xD6, 0x5B, 0x54, 0x94, 0xD6, 0xD5, 0x4C, 0x9A, 0x86, 0xE2, 0x61, 0x78, 0x54, 0xE6, 0xAE, 0xEE, 0xBB, 0xD9 }, + { 0x19, 0x4E, 0x10, 0xC9, 0x38, 0x93, 0xAF, 0xA0, 0x64, 0xC3, 0xAC, 0x04, 0xC0, 0xDD, 0x80, 0x8D, 0x79, 0x1C, 0x3D, 0x4B, 0x75, 0x56, 0xE8, 0x9D, 0x8D, 0x9C, 0xB2, 0x25, 0xC4, 0xB3, 0x33, 0x39 }, + { 0x6F, 0xC4, 0x98, 0x8B, 0x8F, 0x78, 0x54, 0x6B, 0x16, 0x88, 0x99, 0x18, 0x45, 0x90, 0x8F, 0x13, 0x4B, 0x6A, 0x48, 0x2E, 0x69, 0x94, 0xB3, 0xD4, 0x83, 0x17, 0xBF, 0x08, 0xDB, 0x29, 0x21, 0x85 }, + { 0x56, 0x65, 0xBE, 0xB8, 0xB0, 0x95, 0x55, 0x25, 0x81, 0x3B, 0x59, 0x81, 0xCD, 0x14, 0x2E, 0xD4, 0xD0, 0x3F, 0xBA, 0x38, 0xA6, 0xF3, 0xE5, 0xAD, 0x26, 0x8E, 0x0C, 0xC2, 0x70, 0xD1, 0xCD, 0x11 }, + { 0xB8, 0x83, 0xD6, 0x8F, 0x5F, 0xE5, 0x19, 0x36, 0x43, 0x1B, 0xA4, 0x25, 0x67, 0x38, 0x05, 0x3B, 0x1D, 0x04, 0x26, 0xD4, 0xCB, 0x64, 0xB1, 0x6E, 0x83, 0xBA, 0xDC, 0x5E, 0x9F, 0xBE, 0x3B, 0x81 }, + { 0x53, 0xE7, 0xB2, 0x7E, 0xA5, 0x9C, 0x2F, 0x6D, 0xBB, 0x50, 0x76, 0x9E, 0x43, 0x55, 0x4D, 0xF3, 0x5A, 0xF8, 0x9F, 0x48, 0x22, 0xD0, 0x46, 0x6B, 0x00, 0x7D, 0xD6, 0xF6, 0xDE, 0xAF, 0xFF, 0x02 }, + { 0x1F, 0x1A, 0x02, 0x29, 0xD4, 0x64, 0x0F, 0x01, 0x90, 0x15, 0x88, 0xD9, 0xDE, 0xC2, 0x2D, 0x13, 0xFC, 0x3E, 0xB3, 0x4A, 0x61, 0xB3, 0x29, 0x38, 0xEF, 0xBF, 0x53, 0x34, 0xB2, 0x80, 0x0A, 0xFA }, + { 0xC2, 0xB4, 0x05, 0xAF, 0xA0, 0xFA, 0x66, 0x68, 0x85, 0x2A, 0xEE, 0x4D, 0x88, 0x04, 0x08, 0x53, 0xFA, 0xB8, 0x00, 0xE7, 0x2B, 0x57, 0x58, 0x14, 0x18, 0xE5, 0x50, 0x6F, 0x21, 0x4C, 0x7D, 0x1F }, + { 0xC0, 0x8A, 0xA1, 0xC2, 0x86, 0xD7, 0x09, 0xFD, 0xC7, 0x47, 0x37, 0x44, 0x97, 0x71, 0x88, 0xC8, 0x95, 0xBA, 0x01, 0x10, 0x14, 0x24, 0x7E, 0x4E, 0xFA, 0x8D, 0x07, 0xE7, 0x8F, 0xEC, 0x69, 0x5C }, + { 0xF0, 0x3F, 0x57, 0x89, 0xD3, 0x33, 0x6B, 0x80, 0xD0, 0x02, 0xD5, 0x9F, 0xDF, 0x91, 0x8B, 0xDB, 0x77, 0x5B, 0x00, 0x95, 0x6E, 0xD5, 0x52, 0x8E, 0x86, 0xAA, 0x99, 0x4A, 0xCB, 0x38, 0xFE, 0x2D } +}; + +static const uint8_t blake2s_keyed_testvecs[][BLAKE2S_OUTBYTES] = { + { 0x48, 0xA8, 0x99, 0x7D, 0xA4, 0x07, 0x87, 0x6B, 0x3D, 0x79, 0xC0, 0xD9, 0x23, 0x25, 0xAD, 0x3B, 0x89, 0xCB, 0xB7, 0x54, 0xD8, 0x6A, 0xB7, 0x1A, 0xEE, 0x04, 0x7A, 0xD3, 0x45, 0xFD, 0x2C, 0x49 }, + { 0x40, 0xD1, 0x5F, 0xEE, 0x7C, 0x32, 0x88, 0x30, 0x16, 0x6A, 0xC3, 0xF9, 0x18, 0x65, 0x0F, 0x80, 0x7E, 0x7E, 0x01, 0xE1, 0x77, 0x25, 0x8C, 0xDC, 0x0A, 0x39, 0xB1, 0x1F, 0x59, 0x80, 0x66, 0xF1 }, + { 0x6B, 0xB7, 0x13, 0x00, 0x64, 0x4C, 0xD3, 0x99, 0x1B, 0x26, 0xCC, 0xD4, 0xD2, 0x74, 0xAC, 0xD1, 0xAD, 0xEA, 0xB8, 0xB1, 0xD7, 0x91, 0x45, 0x46, 0xC1, 0x19, 0x8B, 0xBE, 0x9F, 0xC9, 0xD8, 0x03 }, + { 0x1D, 0x22, 0x0D, 0xBE, 0x2E, 0xE1, 0x34, 0x66, 0x1F, 0xDF, 0x6D, 0x9E, 0x74, 0xB4, 0x17, 0x04, 0x71, 0x05, 0x56, 0xF2, 0xF6, 0xE5, 0xA0, 0x91, 0xB2, 0x27, 0x69, 0x74, 0x45, 0xDB, 0xEA, 0x6B }, + { 0xF6, 0xC3, 0xFB, 0xAD, 0xB4, 0xCC, 0x68, 0x7A, 0x00, 0x64, 0xA5, 0xBE, 0x6E, 0x79, 0x1B, 0xEC, 0x63, 0xB8, 0x68, 0xAD, 0x62, 0xFB, 0xA6, 0x1B, 0x37, 0x57, 0xEF, 0x9C, 0xA5, 0x2E, 0x05, 0xB2 }, + { 0x49, 0xC1, 0xF2, 0x11, 0x88, 0xDF, 0xD7, 0x69, 0xAE, 0xA0, 0xE9, 0x11, 0xDD, 0x6B, 0x41, 0xF1, 0x4D, 0xAB, 0x10, 0x9D, 0x2B, 0x85, 0x97, 0x7A, 0xA3, 0x08, 0x8B, 0x5C, 0x70, 0x7E, 0x85, 0x98 }, + { 0xFD, 0xD8, 0x99, 0x3D, 0xCD, 0x43, 0xF6, 0x96, 0xD4, 0x4F, 0x3C, 0xEA, 0x0F, 0xF3, 0x53, 0x45, 0x23, 0x4E, 0xC8, 0xEE, 0x08, 0x3E, 0xB3, 0xCA, 0xDA, 0x01, 0x7C, 0x7F, 0x78, 0xC1, 0x71, 0x43 }, + { 0xE6, 0xC8, 0x12, 0x56, 0x37, 0x43, 0x8D, 0x09, 0x05, 0xB7, 0x49, 0xF4, 0x65, 0x60, 0xAC, 0x89, 0xFD, 0x47, 0x1C, 0xF8, 0x69, 0x2E, 0x28, 0xFA, 0xB9, 0x82, 0xF7, 0x3F, 0x01, 0x9B, 0x83, 0xA9 }, + { 0x19, 0xFC, 0x8C, 0xA6, 0x97, 0x9D, 0x60, 0xE6, 0xED, 0xD3, 0xB4, 0x54, 0x1E, 0x2F, 0x96, 0x7C, 0xED, 0x74, 0x0D, 0xF6, 0xEC, 0x1E, 0xAE, 0xBB, 0xFE, 0x81, 0x38, 0x32, 0xE9, 0x6B, 0x29, 0x74 }, + { 0xA6, 0xAD, 0x77, 0x7C, 0xE8, 0x81, 0xB5, 0x2B, 0xB5, 0xA4, 0x42, 0x1A, 0xB6, 0xCD, 0xD2, 0xDF, 0xBA, 0x13, 0xE9, 0x63, 0x65, 0x2D, 0x4D, 0x6D, 0x12, 0x2A, 0xEE, 0x46, 0x54, 0x8C, 0x14, 0xA7 }, + { 0xF5, 0xC4, 0xB2, 0xBA, 0x1A, 0x00, 0x78, 0x1B, 0x13, 0xAB, 0xA0, 0x42, 0x52, 0x42, 0xC6, 0x9C, 0xB1, 0x55, 0x2F, 0x3F, 0x71, 0xA9, 0xA3, 0xBB, 0x22, 0xB4, 0xA6, 0xB4, 0x27, 0x7B, 0x46, 0xDD }, + { 0xE3, 0x3C, 0x4C, 0x9B, 0xD0, 0xCC, 0x7E, 0x45, 0xC8, 0x0E, 0x65, 0xC7, 0x7F, 0xA5, 0x99, 0x7F, 0xEC, 0x70, 0x02, 0x73, 0x85, 0x41, 0x50, 0x9E, 0x68, 0xA9, 0x42, 0x38, 0x91, 0xE8, 0x22, 0xA3 }, + { 0xFB, 0xA1, 0x61, 0x69, 0xB2, 0xC3, 0xEE, 0x10, 0x5B, 0xE6, 0xE1, 0xE6, 0x50, 0xE5, 0xCB, 0xF4, 0x07, 0x46, 0xB6, 0x75, 0x3D, 0x03, 0x6A, 0xB5, 0x51, 0x79, 0x01, 0x4A, 0xD7, 0xEF, 0x66, 0x51 }, + { 0xF5, 0xC4, 0xBE, 0xC6, 0xD6, 0x2F, 0xC6, 0x08, 0xBF, 0x41, 0xCC, 0x11, 0x5F, 0x16, 0xD6, 0x1C, 0x7E, 0xFD, 0x3F, 0xF6, 0xC6, 0x56, 0x92, 0xBB, 0xE0, 0xAF, 0xFF, 0xB1, 0xFE, 0xDE, 0x74, 0x75 }, + { 0xA4, 0x86, 0x2E, 0x76, 0xDB, 0x84, 0x7F, 0x05, 0xBA, 0x17, 0xED, 0xE5, 0xDA, 0x4E, 0x7F, 0x91, 0xB5, 0x92, 0x5C, 0xF1, 0xAD, 0x4B, 0xA1, 0x27, 0x32, 0xC3, 0x99, 0x57, 0x42, 0xA5, 0xCD, 0x6E }, + { 0x65, 0xF4, 0xB8, 0x60, 0xCD, 0x15, 0xB3, 0x8E, 0xF8, 0x14, 0xA1, 0xA8, 0x04, 0x31, 0x4A, 0x55, 0xBE, 0x95, 0x3C, 0xAA, 0x65, 0xFD, 0x75, 0x8A, 0xD9, 0x89, 0xFF, 0x34, 0xA4, 0x1C, 0x1E, 0xEA }, + { 0x19, 0xBA, 0x23, 0x4F, 0x0A, 0x4F, 0x38, 0x63, 0x7D, 0x18, 0x39, 0xF9, 0xD9, 0xF7, 0x6A, 0xD9, 0x1C, 0x85, 0x22, 0x30, 0x71, 0x43, 0xC9, 0x7D, 0x5F, 0x93, 0xF6, 0x92, 0x74, 0xCE, 0xC9, 0xA7 }, + { 0x1A, 0x67, 0x18, 0x6C, 0xA4, 0xA5, 0xCB, 0x8E, 0x65, 0xFC, 0xA0, 0xE2, 0xEC, 0xBC, 0x5D, 0xDC, 0x14, 0xAE, 0x38, 0x1B, 0xB8, 0xBF, 0xFE, 0xB9, 0xE0, 0xA1, 0x03, 0x44, 0x9E, 0x3E, 0xF0, 0x3C }, + { 0xAF, 0xBE, 0xA3, 0x17, 0xB5, 0xA2, 0xE8, 0x9C, 0x0B, 0xD9, 0x0C, 0xCF, 0x5D, 0x7F, 0xD0, 0xED, 0x57, 0xFE, 0x58, 0x5E, 0x4B, 0xE3, 0x27, 0x1B, 0x0A, 0x6B, 0xF0, 0xF5, 0x78, 0x6B, 0x0F, 0x26 }, + { 0xF1, 0xB0, 0x15, 0x58, 0xCE, 0x54, 0x12, 0x62, 0xF5, 0xEC, 0x34, 0x29, 0x9D, 0x6F, 0xB4, 0x09, 0x00, 0x09, 0xE3, 0x43, 0x4B, 0xE2, 0xF4, 0x91, 0x05, 0xCF, 0x46, 0xAF, 0x4D, 0x2D, 0x41, 0x24 }, + { 0x13, 0xA0, 0xA0, 0xC8, 0x63, 0x35, 0x63, 0x5E, 0xAA, 0x74, 0xCA, 0x2D, 0x5D, 0x48, 0x8C, 0x79, 0x7B, 0xBB, 0x4F, 0x47, 0xDC, 0x07, 0x10, 0x50, 0x15, 0xED, 0x6A, 0x1F, 0x33, 0x09, 0xEF, 0xCE }, + { 0x15, 0x80, 0xAF, 0xEE, 0xBE, 0xBB, 0x34, 0x6F, 0x94, 0xD5, 0x9F, 0xE6, 0x2D, 0xA0, 0xB7, 0x92, 0x37, 0xEA, 0xD7, 0xB1, 0x49, 0x1F, 0x56, 0x67, 0xA9, 0x0E, 0x45, 0xED, 0xF6, 0xCA, 0x8B, 0x03 }, + { 0x20, 0xBE, 0x1A, 0x87, 0x5B, 0x38, 0xC5, 0x73, 0xDD, 0x7F, 0xAA, 0xA0, 0xDE, 0x48, 0x9D, 0x65, 0x5C, 0x11, 0xEF, 0xB6, 0xA5, 0x52, 0x69, 0x8E, 0x07, 0xA2, 0xD3, 0x31, 0xB5, 0xF6, 0x55, 0xC3 }, + { 0xBE, 0x1F, 0xE3, 0xC4, 0xC0, 0x40, 0x18, 0xC5, 0x4C, 0x4A, 0x0F, 0x6B, 0x9A, 0x2E, 0xD3, 0xC5, 0x3A, 0xBE, 0x3A, 0x9F, 0x76, 0xB4, 0xD2, 0x6D, 0xE5, 0x6F, 0xC9, 0xAE, 0x95, 0x05, 0x9A, 0x99 }, + { 0xE3, 0xE3, 0xAC, 0xE5, 0x37, 0xEB, 0x3E, 0xDD, 0x84, 0x63, 0xD9, 0xAD, 0x35, 0x82, 0xE1, 0x3C, 0xF8, 0x65, 0x33, 0xFF, 0xDE, 0x43, 0xD6, 0x68, 0xDD, 0x2E, 0x93, 0xBB, 0xDB, 0xD7, 0x19, 0x5A }, + { 0x11, 0x0C, 0x50, 0xC0, 0xBF, 0x2C, 0x6E, 0x7A, 0xEB, 0x7E, 0x43, 0x5D, 0x92, 0xD1, 0x32, 0xAB, 0x66, 0x55, 0x16, 0x8E, 0x78, 0xA2, 0xDE, 0xCD, 0xEC, 0x33, 0x30, 0x77, 0x76, 0x84, 0xD9, 0xC1 }, + { 0xE9, 0xBA, 0x8F, 0x50, 0x5C, 0x9C, 0x80, 0xC0, 0x86, 0x66, 0xA7, 0x01, 0xF3, 0x36, 0x7E, 0x6C, 0xC6, 0x65, 0xF3, 0x4B, 0x22, 0xE7, 0x3C, 0x3C, 0x04, 0x17, 0xEB, 0x1C, 0x22, 0x06, 0x08, 0x2F }, + { 0x26, 0xCD, 0x66, 0xFC, 0xA0, 0x23, 0x79, 0xC7, 0x6D, 0xF1, 0x23, 0x17, 0x05, 0x2B, 0xCA, 0xFD, 0x6C, 0xD8, 0xC3, 0xA7, 0xB8, 0x90, 0xD8, 0x05, 0xF3, 0x6C, 0x49, 0x98, 0x97, 0x82, 0x43, 0x3A }, + { 0x21, 0x3F, 0x35, 0x96, 0xD6, 0xE3, 0xA5, 0xD0, 0xE9, 0x93, 0x2C, 0xD2, 0x15, 0x91, 0x46, 0x01, 0x5E, 0x2A, 0xBC, 0x94, 0x9F, 0x47, 0x29, 0xEE, 0x26, 0x32, 0xFE, 0x1E, 0xDB, 0x78, 0xD3, 0x37 }, + { 0x10, 0x15, 0xD7, 0x01, 0x08, 0xE0, 0x3B, 0xE1, 0xC7, 0x02, 0xFE, 0x97, 0x25, 0x36, 0x07, 0xD1, 0x4A, 0xEE, 0x59, 0x1F, 0x24, 0x13, 0xEA, 0x67, 0x87, 0x42, 0x7B, 0x64, 0x59, 0xFF, 0x21, 0x9A }, + { 0x3C, 0xA9, 0x89, 0xDE, 0x10, 0xCF, 0xE6, 0x09, 0x90, 0x94, 0x72, 0xC8, 0xD3, 0x56, 0x10, 0x80, 0x5B, 0x2F, 0x97, 0x77, 0x34, 0xCF, 0x65, 0x2C, 0xC6, 0x4B, 0x3B, 0xFC, 0x88, 0x2D, 0x5D, 0x89 }, + { 0xB6, 0x15, 0x6F, 0x72, 0xD3, 0x80, 0xEE, 0x9E, 0xA6, 0xAC, 0xD1, 0x90, 0x46, 0x4F, 0x23, 0x07, 0xA5, 0xC1, 0x79, 0xEF, 0x01, 0xFD, 0x71, 0xF9, 0x9F, 0x2D, 0x0F, 0x7A, 0x57, 0x36, 0x0A, 0xEA }, + { 0xC0, 0x3B, 0xC6, 0x42, 0xB2, 0x09, 0x59, 0xCB, 0xE1, 0x33, 0xA0, 0x30, 0x3E, 0x0C, 0x1A, 0xBF, 0xF3, 0xE3, 0x1E, 0xC8, 0xE1, 0xA3, 0x28, 0xEC, 0x85, 0x65, 0xC3, 0x6D, 0xEC, 0xFF, 0x52, 0x65 }, + { 0x2C, 0x3E, 0x08, 0x17, 0x6F, 0x76, 0x0C, 0x62, 0x64, 0xC3, 0xA2, 0xCD, 0x66, 0xFE, 0xC6, 0xC3, 0xD7, 0x8D, 0xE4, 0x3F, 0xC1, 0x92, 0x45, 0x7B, 0x2A, 0x4A, 0x66, 0x0A, 0x1E, 0x0E, 0xB2, 0x2B }, + { 0xF7, 0x38, 0xC0, 0x2F, 0x3C, 0x1B, 0x19, 0x0C, 0x51, 0x2B, 0x1A, 0x32, 0xDE, 0xAB, 0xF3, 0x53, 0x72, 0x8E, 0x0E, 0x9A, 0xB0, 0x34, 0x49, 0x0E, 0x3C, 0x34, 0x09, 0x94, 0x6A, 0x97, 0xAE, 0xEC }, + { 0x8B, 0x18, 0x80, 0xDF, 0x30, 0x1C, 0xC9, 0x63, 0x41, 0x88, 0x11, 0x08, 0x89, 0x64, 0x83, 0x92, 0x87, 0xFF, 0x7F, 0xE3, 0x1C, 0x49, 0xEA, 0x6E, 0xBD, 0x9E, 0x48, 0xBD, 0xEE, 0xE4, 0x97, 0xC5 }, + { 0x1E, 0x75, 0xCB, 0x21, 0xC6, 0x09, 0x89, 0x02, 0x03, 0x75, 0xF1, 0xA7, 0xA2, 0x42, 0x83, 0x9F, 0x0B, 0x0B, 0x68, 0x97, 0x3A, 0x4C, 0x2A, 0x05, 0xCF, 0x75, 0x55, 0xED, 0x5A, 0xAE, 0xC4, 0xC1 }, + { 0x62, 0xBF, 0x8A, 0x9C, 0x32, 0xA5, 0xBC, 0xCF, 0x29, 0x0B, 0x6C, 0x47, 0x4D, 0x75, 0xB2, 0xA2, 0xA4, 0x09, 0x3F, 0x1A, 0x9E, 0x27, 0x13, 0x94, 0x33, 0xA8, 0xF2, 0xB3, 0xBC, 0xE7, 0xB8, 0xD7 }, + { 0x16, 0x6C, 0x83, 0x50, 0xD3, 0x17, 0x3B, 0x5E, 0x70, 0x2B, 0x78, 0x3D, 0xFD, 0x33, 0xC6, 0x6E, 0xE0, 0x43, 0x27, 0x42, 0xE9, 0xB9, 0x2B, 0x99, 0x7F, 0xD2, 0x3C, 0x60, 0xDC, 0x67, 0x56, 0xCA }, + { 0x04, 0x4A, 0x14, 0xD8, 0x22, 0xA9, 0x0C, 0xAC, 0xF2, 0xF5, 0xA1, 0x01, 0x42, 0x8A, 0xDC, 0x8F, 0x41, 0x09, 0x38, 0x6C, 0xCB, 0x15, 0x8B, 0xF9, 0x05, 0xC8, 0x61, 0x8B, 0x8E, 0xE2, 0x4E, 0xC3 }, + { 0x38, 0x7D, 0x39, 0x7E, 0xA4, 0x3A, 0x99, 0x4B, 0xE8, 0x4D, 0x2D, 0x54, 0x4A, 0xFB, 0xE4, 0x81, 0xA2, 0x00, 0x0F, 0x55, 0x25, 0x26, 0x96, 0xBB, 0xA2, 0xC5, 0x0C, 0x8E, 0xBD, 0x10, 0x13, 0x47 }, + { 0x56, 0xF8, 0xCC, 0xF1, 0xF8, 0x64, 0x09, 0xB4, 0x6C, 0xE3, 0x61, 0x66, 0xAE, 0x91, 0x65, 0x13, 0x84, 0x41, 0x57, 0x75, 0x89, 0xDB, 0x08, 0xCB, 0xC5, 0xF6, 0x6C, 0xA2, 0x97, 0x43, 0xB9, 0xFD }, + { 0x97, 0x06, 0xC0, 0x92, 0xB0, 0x4D, 0x91, 0xF5, 0x3D, 0xFF, 0x91, 0xFA, 0x37, 0xB7, 0x49, 0x3D, 0x28, 0xB5, 0x76, 0xB5, 0xD7, 0x10, 0x46, 0x9D, 0xF7, 0x94, 0x01, 0x66, 0x22, 0x36, 0xFC, 0x03 }, + { 0x87, 0x79, 0x68, 0x68, 0x6C, 0x06, 0x8C, 0xE2, 0xF7, 0xE2, 0xAD, 0xCF, 0xF6, 0x8B, 0xF8, 0x74, 0x8E, 0xDF, 0x3C, 0xF8, 0x62, 0xCF, 0xB4, 0xD3, 0x94, 0x7A, 0x31, 0x06, 0x95, 0x80, 0x54, 0xE3 }, + { 0x88, 0x17, 0xE5, 0x71, 0x98, 0x79, 0xAC, 0xF7, 0x02, 0x47, 0x87, 0xEC, 0xCD, 0xB2, 0x71, 0x03, 0x55, 0x66, 0xCF, 0xA3, 0x33, 0xE0, 0x49, 0x40, 0x7C, 0x01, 0x78, 0xCC, 0xC5, 0x7A, 0x5B, 0x9F }, + { 0x89, 0x38, 0x24, 0x9E, 0x4B, 0x50, 0xCA, 0xDA, 0xCC, 0xDF, 0x5B, 0x18, 0x62, 0x13, 0x26, 0xCB, 0xB1, 0x52, 0x53, 0xE3, 0x3A, 0x20, 0xF5, 0x63, 0x6E, 0x99, 0x5D, 0x72, 0x47, 0x8D, 0xE4, 0x72 }, + { 0xF1, 0x64, 0xAB, 0xBA, 0x49, 0x63, 0xA4, 0x4D, 0x10, 0x72, 0x57, 0xE3, 0x23, 0x2D, 0x90, 0xAC, 0xA5, 0xE6, 0x6A, 0x14, 0x08, 0x24, 0x8C, 0x51, 0x74, 0x1E, 0x99, 0x1D, 0xB5, 0x22, 0x77, 0x56 }, + { 0xD0, 0x55, 0x63, 0xE2, 0xB1, 0xCB, 0xA0, 0xC4, 0xA2, 0xA1, 0xE8, 0xBD, 0xE3, 0xA1, 0xA0, 0xD9, 0xF5, 0xB4, 0x0C, 0x85, 0xA0, 0x70, 0xD6, 0xF5, 0xFB, 0x21, 0x06, 0x6E, 0xAD, 0x5D, 0x06, 0x01 }, + { 0x03, 0xFB, 0xB1, 0x63, 0x84, 0xF0, 0xA3, 0x86, 0x6F, 0x4C, 0x31, 0x17, 0x87, 0x76, 0x66, 0xEF, 0xBF, 0x12, 0x45, 0x97, 0x56, 0x4B, 0x29, 0x3D, 0x4A, 0xAB, 0x0D, 0x26, 0x9F, 0xAB, 0xDD, 0xFA }, + { 0x5F, 0xA8, 0x48, 0x6A, 0xC0, 0xE5, 0x29, 0x64, 0xD1, 0x88, 0x1B, 0xBE, 0x33, 0x8E, 0xB5, 0x4B, 0xE2, 0xF7, 0x19, 0x54, 0x92, 0x24, 0x89, 0x20, 0x57, 0xB4, 0xDA, 0x04, 0xBA, 0x8B, 0x34, 0x75 }, + { 0xCD, 0xFA, 0xBC, 0xEE, 0x46, 0x91, 0x11, 0x11, 0x23, 0x6A, 0x31, 0x70, 0x8B, 0x25, 0x39, 0xD7, 0x1F, 0xC2, 0x11, 0xD9, 0xB0, 0x9C, 0x0D, 0x85, 0x30, 0xA1, 0x1E, 0x1D, 0xBF, 0x6E, 0xED, 0x01 }, + { 0x4F, 0x82, 0xDE, 0x03, 0xB9, 0x50, 0x47, 0x93, 0xB8, 0x2A, 0x07, 0xA0, 0xBD, 0xCD, 0xFF, 0x31, 0x4D, 0x75, 0x9E, 0x7B, 0x62, 0xD2, 0x6B, 0x78, 0x49, 0x46, 0xB0, 0xD3, 0x6F, 0x91, 0x6F, 0x52 }, + { 0x25, 0x9E, 0xC7, 0xF1, 0x73, 0xBC, 0xC7, 0x6A, 0x09, 0x94, 0xC9, 0x67, 0xB4, 0xF5, 0xF0, 0x24, 0xC5, 0x60, 0x57, 0xFB, 0x79, 0xC9, 0x65, 0xC4, 0xFA, 0xE4, 0x18, 0x75, 0xF0, 0x6A, 0x0E, 0x4C }, + { 0x19, 0x3C, 0xC8, 0xE7, 0xC3, 0xE0, 0x8B, 0xB3, 0x0F, 0x54, 0x37, 0xAA, 0x27, 0xAD, 0xE1, 0xF1, 0x42, 0x36, 0x9B, 0x24, 0x6A, 0x67, 0x5B, 0x23, 0x83, 0xE6, 0xDA, 0x9B, 0x49, 0xA9, 0x80, 0x9E }, + { 0x5C, 0x10, 0x89, 0x6F, 0x0E, 0x28, 0x56, 0xB2, 0xA2, 0xEE, 0xE0, 0xFE, 0x4A, 0x2C, 0x16, 0x33, 0x56, 0x5D, 0x18, 0xF0, 0xE9, 0x3E, 0x1F, 0xAB, 0x26, 0xC3, 0x73, 0xE8, 0xF8, 0x29, 0x65, 0x4D }, + { 0xF1, 0x60, 0x12, 0xD9, 0x3F, 0x28, 0x85, 0x1A, 0x1E, 0xB9, 0x89, 0xF5, 0xD0, 0xB4, 0x3F, 0x3F, 0x39, 0xCA, 0x73, 0xC9, 0xA6, 0x2D, 0x51, 0x81, 0xBF, 0xF2, 0x37, 0x53, 0x6B, 0xD3, 0x48, 0xC3 }, + { 0x29, 0x66, 0xB3, 0xCF, 0xAE, 0x1E, 0x44, 0xEA, 0x99, 0x6D, 0xC5, 0xD6, 0x86, 0xCF, 0x25, 0xFA, 0x05, 0x3F, 0xB6, 0xF6, 0x72, 0x01, 0xB9, 0xE4, 0x6E, 0xAD, 0xE8, 0x5D, 0x0A, 0xD6, 0xB8, 0x06 }, + { 0xDD, 0xB8, 0x78, 0x24, 0x85, 0xE9, 0x00, 0xBC, 0x60, 0xBC, 0xF4, 0xC3, 0x3A, 0x6F, 0xD5, 0x85, 0x68, 0x0C, 0xC6, 0x83, 0xD5, 0x16, 0xEF, 0xA0, 0x3E, 0xB9, 0x98, 0x5F, 0xAD, 0x87, 0x15, 0xFB }, + { 0x4C, 0x4D, 0x6E, 0x71, 0xAE, 0xA0, 0x57, 0x86, 0x41, 0x31, 0x48, 0xFC, 0x7A, 0x78, 0x6B, 0x0E, 0xCA, 0xF5, 0x82, 0xCF, 0xF1, 0x20, 0x9F, 0x5A, 0x80, 0x9F, 0xBA, 0x85, 0x04, 0xCE, 0x66, 0x2C }, + { 0xFB, 0x4C, 0x5E, 0x86, 0xD7, 0xB2, 0x22, 0x9B, 0x99, 0xB8, 0xBA, 0x6D, 0x94, 0xC2, 0x47, 0xEF, 0x96, 0x4A, 0xA3, 0xA2, 0xBA, 0xE8, 0xED, 0xC7, 0x75, 0x69, 0xF2, 0x8D, 0xBB, 0xFF, 0x2D, 0x4E }, + { 0xE9, 0x4F, 0x52, 0x6D, 0xE9, 0x01, 0x96, 0x33, 0xEC, 0xD5, 0x4A, 0xC6, 0x12, 0x0F, 0x23, 0x95, 0x8D, 0x77, 0x18, 0xF1, 0xE7, 0x71, 0x7B, 0xF3, 0x29, 0x21, 0x1A, 0x4F, 0xAE, 0xED, 0x4E, 0x6D }, + { 0xCB, 0xD6, 0x66, 0x0A, 0x10, 0xDB, 0x3F, 0x23, 0xF7, 0xA0, 0x3D, 0x4B, 0x9D, 0x40, 0x44, 0xC7, 0x93, 0x2B, 0x28, 0x01, 0xAC, 0x89, 0xD6, 0x0B, 0xC9, 0xEB, 0x92, 0xD6, 0x5A, 0x46, 0xC2, 0xA0 }, + { 0x88, 0x18, 0xBB, 0xD3, 0xDB, 0x4D, 0xC1, 0x23, 0xB2, 0x5C, 0xBB, 0xA5, 0xF5, 0x4C, 0x2B, 0xC4, 0xB3, 0xFC, 0xF9, 0xBF, 0x7D, 0x7A, 0x77, 0x09, 0xF4, 0xAE, 0x58, 0x8B, 0x26, 0x7C, 0x4E, 0xCE }, + { 0xC6, 0x53, 0x82, 0x51, 0x3F, 0x07, 0x46, 0x0D, 0xA3, 0x98, 0x33, 0xCB, 0x66, 0x6C, 0x5E, 0xD8, 0x2E, 0x61, 0xB9, 0xE9, 0x98, 0xF4, 0xB0, 0xC4, 0x28, 0x7C, 0xEE, 0x56, 0xC3, 0xCC, 0x9B, 0xCD }, + { 0x89, 0x75, 0xB0, 0x57, 0x7F, 0xD3, 0x55, 0x66, 0xD7, 0x50, 0xB3, 0x62, 0xB0, 0x89, 0x7A, 0x26, 0xC3, 0x99, 0x13, 0x6D, 0xF0, 0x7B, 0xAB, 0xAB, 0xBD, 0xE6, 0x20, 0x3F, 0xF2, 0x95, 0x4E, 0xD4 }, + { 0x21, 0xFE, 0x0C, 0xEB, 0x00, 0x52, 0xBE, 0x7F, 0xB0, 0xF0, 0x04, 0x18, 0x7C, 0xAC, 0xD7, 0xDE, 0x67, 0xFA, 0x6E, 0xB0, 0x93, 0x8D, 0x92, 0x76, 0x77, 0xF2, 0x39, 0x8C, 0x13, 0x23, 0x17, 0xA8 }, + { 0x2E, 0xF7, 0x3F, 0x3C, 0x26, 0xF1, 0x2D, 0x93, 0x88, 0x9F, 0x3C, 0x78, 0xB6, 0xA6, 0x6C, 0x1D, 0x52, 0xB6, 0x49, 0xDC, 0x9E, 0x85, 0x6E, 0x2C, 0x17, 0x2E, 0xA7, 0xC5, 0x8A, 0xC2, 0xB5, 0xE3 }, + { 0x38, 0x8A, 0x3C, 0xD5, 0x6D, 0x73, 0x86, 0x7A, 0xBB, 0x5F, 0x84, 0x01, 0x49, 0x2B, 0x6E, 0x26, 0x81, 0xEB, 0x69, 0x85, 0x1E, 0x76, 0x7F, 0xD8, 0x42, 0x10, 0xA5, 0x60, 0x76, 0xFB, 0x3D, 0xD3 }, + { 0xAF, 0x53, 0x3E, 0x02, 0x2F, 0xC9, 0x43, 0x9E, 0x4E, 0x3C, 0xB8, 0x38, 0xEC, 0xD1, 0x86, 0x92, 0x23, 0x2A, 0xDF, 0x6F, 0xE9, 0x83, 0x95, 0x26, 0xD3, 0xC3, 0xDD, 0x1B, 0x71, 0x91, 0x0B, 0x1A }, + { 0x75, 0x1C, 0x09, 0xD4, 0x1A, 0x93, 0x43, 0x88, 0x2A, 0x81, 0xCD, 0x13, 0xEE, 0x40, 0x81, 0x8D, 0x12, 0xEB, 0x44, 0xC6, 0xC7, 0xF4, 0x0D, 0xF1, 0x6E, 0x4A, 0xEA, 0x8F, 0xAB, 0x91, 0x97, 0x2A }, + { 0x5B, 0x73, 0xDD, 0xB6, 0x8D, 0x9D, 0x2B, 0x0A, 0xA2, 0x65, 0xA0, 0x79, 0x88, 0xD6, 0xB8, 0x8A, 0xE9, 0xAA, 0xC5, 0x82, 0xAF, 0x83, 0x03, 0x2F, 0x8A, 0x9B, 0x21, 0xA2, 0xE1, 0xB7, 0xBF, 0x18 }, + { 0x3D, 0xA2, 0x91, 0x26, 0xC7, 0xC5, 0xD7, 0xF4, 0x3E, 0x64, 0x24, 0x2A, 0x79, 0xFE, 0xAA, 0x4E, 0xF3, 0x45, 0x9C, 0xDE, 0xCC, 0xC8, 0x98, 0xED, 0x59, 0xA9, 0x7F, 0x6E, 0xC9, 0x3B, 0x9D, 0xAB }, + { 0x56, 0x6D, 0xC9, 0x20, 0x29, 0x3D, 0xA5, 0xCB, 0x4F, 0xE0, 0xAA, 0x8A, 0xBD, 0xA8, 0xBB, 0xF5, 0x6F, 0x55, 0x23, 0x13, 0xBF, 0xF1, 0x90, 0x46, 0x64, 0x1E, 0x36, 0x15, 0xC1, 0xE3, 0xED, 0x3F }, + { 0x41, 0x15, 0xBE, 0xA0, 0x2F, 0x73, 0xF9, 0x7F, 0x62, 0x9E, 0x5C, 0x55, 0x90, 0x72, 0x0C, 0x01, 0xE7, 0xE4, 0x49, 0xAE, 0x2A, 0x66, 0x97, 0xD4, 0xD2, 0x78, 0x33, 0x21, 0x30, 0x36, 0x92, 0xF9 }, + { 0x4C, 0xE0, 0x8F, 0x47, 0x62, 0x46, 0x8A, 0x76, 0x70, 0x01, 0x21, 0x64, 0x87, 0x8D, 0x68, 0x34, 0x0C, 0x52, 0xA3, 0x5E, 0x66, 0xC1, 0x88, 0x4D, 0x5C, 0x86, 0x48, 0x89, 0xAB, 0xC9, 0x66, 0x77 }, + { 0x81, 0xEA, 0x0B, 0x78, 0x04, 0x12, 0x4E, 0x0C, 0x22, 0xEA, 0x5F, 0xC7, 0x11, 0x04, 0xA2, 0xAF, 0xCB, 0x52, 0xA1, 0xFA, 0x81, 0x6F, 0x3E, 0xCB, 0x7D, 0xCB, 0x5D, 0x9D, 0xEA, 0x17, 0x86, 0xD0 }, + { 0xFE, 0x36, 0x27, 0x33, 0xB0, 0x5F, 0x6B, 0xED, 0xAF, 0x93, 0x79, 0xD7, 0xF7, 0x93, 0x6E, 0xDE, 0x20, 0x9B, 0x1F, 0x83, 0x23, 0xC3, 0x92, 0x25, 0x49, 0xD9, 0xE7, 0x36, 0x81, 0xB5, 0xDB, 0x7B }, + { 0xEF, 0xF3, 0x7D, 0x30, 0xDF, 0xD2, 0x03, 0x59, 0xBE, 0x4E, 0x73, 0xFD, 0xF4, 0x0D, 0x27, 0x73, 0x4B, 0x3D, 0xF9, 0x0A, 0x97, 0xA5, 0x5E, 0xD7, 0x45, 0x29, 0x72, 0x94, 0xCA, 0x85, 0xD0, 0x9F }, + { 0x17, 0x2F, 0xFC, 0x67, 0x15, 0x3D, 0x12, 0xE0, 0xCA, 0x76, 0xA8, 0xB6, 0xCD, 0x5D, 0x47, 0x31, 0x88, 0x5B, 0x39, 0xCE, 0x0C, 0xAC, 0x93, 0xA8, 0x97, 0x2A, 0x18, 0x00, 0x6C, 0x8B, 0x8B, 0xAF }, + { 0xC4, 0x79, 0x57, 0xF1, 0xCC, 0x88, 0xE8, 0x3E, 0xF9, 0x44, 0x58, 0x39, 0x70, 0x9A, 0x48, 0x0A, 0x03, 0x6B, 0xED, 0x5F, 0x88, 0xAC, 0x0F, 0xCC, 0x8E, 0x1E, 0x70, 0x3F, 0xFA, 0xAC, 0x13, 0x2C }, + { 0x30, 0xF3, 0x54, 0x83, 0x70, 0xCF, 0xDC, 0xED, 0xA5, 0xC3, 0x7B, 0x56, 0x9B, 0x61, 0x75, 0xE7, 0x99, 0xEE, 0xF1, 0xA6, 0x2A, 0xAA, 0x94, 0x32, 0x45, 0xAE, 0x76, 0x69, 0xC2, 0x27, 0xA7, 0xB5 }, + { 0xC9, 0x5D, 0xCB, 0x3C, 0xF1, 0xF2, 0x7D, 0x0E, 0xEF, 0x2F, 0x25, 0xD2, 0x41, 0x38, 0x70, 0x90, 0x4A, 0x87, 0x7C, 0x4A, 0x56, 0xC2, 0xDE, 0x1E, 0x83, 0xE2, 0xBC, 0x2A, 0xE2, 0xE4, 0x68, 0x21 }, + { 0xD5, 0xD0, 0xB5, 0xD7, 0x05, 0x43, 0x4C, 0xD4, 0x6B, 0x18, 0x57, 0x49, 0xF6, 0x6B, 0xFB, 0x58, 0x36, 0xDC, 0xDF, 0x6E, 0xE5, 0x49, 0xA2, 0xB7, 0xA4, 0xAE, 0xE7, 0xF5, 0x80, 0x07, 0xCA, 0xAF }, + { 0xBB, 0xC1, 0x24, 0xA7, 0x12, 0xF1, 0x5D, 0x07, 0xC3, 0x00, 0xE0, 0x5B, 0x66, 0x83, 0x89, 0xA4, 0x39, 0xC9, 0x17, 0x77, 0xF7, 0x21, 0xF8, 0x32, 0x0C, 0x1C, 0x90, 0x78, 0x06, 0x6D, 0x2C, 0x7E }, + { 0xA4, 0x51, 0xB4, 0x8C, 0x35, 0xA6, 0xC7, 0x85, 0x4C, 0xFA, 0xAE, 0x60, 0x26, 0x2E, 0x76, 0x99, 0x08, 0x16, 0x38, 0x2A, 0xC0, 0x66, 0x7E, 0x5A, 0x5C, 0x9E, 0x1B, 0x46, 0xC4, 0x34, 0x2D, 0xDF }, + { 0xB0, 0xD1, 0x50, 0xFB, 0x55, 0xE7, 0x78, 0xD0, 0x11, 0x47, 0xF0, 0xB5, 0xD8, 0x9D, 0x99, 0xEC, 0xB2, 0x0F, 0xF0, 0x7E, 0x5E, 0x67, 0x60, 0xD6, 0xB6, 0x45, 0xEB, 0x5B, 0x65, 0x4C, 0x62, 0x2B }, + { 0x34, 0xF7, 0x37, 0xC0, 0xAB, 0x21, 0x99, 0x51, 0xEE, 0xE8, 0x9A, 0x9F, 0x8D, 0xAC, 0x29, 0x9C, 0x9D, 0x4C, 0x38, 0xF3, 0x3F, 0xA4, 0x94, 0xC5, 0xC6, 0xEE, 0xFC, 0x92, 0xB6, 0xDB, 0x08, 0xBC }, + { 0x1A, 0x62, 0xCC, 0x3A, 0x00, 0x80, 0x0D, 0xCB, 0xD9, 0x98, 0x91, 0x08, 0x0C, 0x1E, 0x09, 0x84, 0x58, 0x19, 0x3A, 0x8C, 0xC9, 0xF9, 0x70, 0xEA, 0x99, 0xFB, 0xEF, 0xF0, 0x03, 0x18, 0xC2, 0x89 }, + { 0xCF, 0xCE, 0x55, 0xEB, 0xAF, 0xC8, 0x40, 0xD7, 0xAE, 0x48, 0x28, 0x1C, 0x7F, 0xD5, 0x7E, 0xC8, 0xB4, 0x82, 0xD4, 0xB7, 0x04, 0x43, 0x74, 0x95, 0x49, 0x5A, 0xC4, 0x14, 0xCF, 0x4A, 0x37, 0x4B }, + { 0x67, 0x46, 0xFA, 0xCF, 0x71, 0x14, 0x6D, 0x99, 0x9D, 0xAB, 0xD0, 0x5D, 0x09, 0x3A, 0xE5, 0x86, 0x64, 0x8D, 0x1E, 0xE2, 0x8E, 0x72, 0x61, 0x7B, 0x99, 0xD0, 0xF0, 0x08, 0x6E, 0x1E, 0x45, 0xBF }, + { 0x57, 0x1C, 0xED, 0x28, 0x3B, 0x3F, 0x23, 0xB4, 0xE7, 0x50, 0xBF, 0x12, 0xA2, 0xCA, 0xF1, 0x78, 0x18, 0x47, 0xBD, 0x89, 0x0E, 0x43, 0x60, 0x3C, 0xDC, 0x59, 0x76, 0x10, 0x2B, 0x7B, 0xB1, 0x1B }, + { 0xCF, 0xCB, 0x76, 0x5B, 0x04, 0x8E, 0x35, 0x02, 0x2C, 0x5D, 0x08, 0x9D, 0x26, 0xE8, 0x5A, 0x36, 0xB0, 0x05, 0xA2, 0xB8, 0x04, 0x93, 0xD0, 0x3A, 0x14, 0x4E, 0x09, 0xF4, 0x09, 0xB6, 0xAF, 0xD1 }, + { 0x40, 0x50, 0xC7, 0xA2, 0x77, 0x05, 0xBB, 0x27, 0xF4, 0x20, 0x89, 0xB2, 0x99, 0xF3, 0xCB, 0xE5, 0x05, 0x4E, 0xAD, 0x68, 0x72, 0x7E, 0x8E, 0xF9, 0x31, 0x8C, 0xE6, 0xF2, 0x5C, 0xD6, 0xF3, 0x1D }, + { 0x18, 0x40, 0x70, 0xBD, 0x5D, 0x26, 0x5F, 0xBD, 0xC1, 0x42, 0xCD, 0x1C, 0x5C, 0xD0, 0xD7, 0xE4, 0x14, 0xE7, 0x03, 0x69, 0xA2, 0x66, 0xD6, 0x27, 0xC8, 0xFB, 0xA8, 0x4F, 0xA5, 0xE8, 0x4C, 0x34 }, + { 0x9E, 0xDD, 0xA9, 0xA4, 0x44, 0x39, 0x02, 0xA9, 0x58, 0x8C, 0x0D, 0x0C, 0xCC, 0x62, 0xB9, 0x30, 0x21, 0x84, 0x79, 0xA6, 0x84, 0x1E, 0x6F, 0xE7, 0xD4, 0x30, 0x03, 0xF0, 0x4B, 0x1F, 0xD6, 0x43 }, + { 0xE4, 0x12, 0xFE, 0xEF, 0x79, 0x08, 0x32, 0x4A, 0x6D, 0xA1, 0x84, 0x16, 0x29, 0xF3, 0x5D, 0x3D, 0x35, 0x86, 0x42, 0x01, 0x93, 0x10, 0xEC, 0x57, 0xC6, 0x14, 0x83, 0x6B, 0x63, 0xD3, 0x07, 0x63 }, + { 0x1A, 0x2B, 0x8E, 0xDF, 0xF3, 0xF9, 0xAC, 0xC1, 0x55, 0x4F, 0xCB, 0xAE, 0x3C, 0xF1, 0xD6, 0x29, 0x8C, 0x64, 0x62, 0xE2, 0x2E, 0x5E, 0xB0, 0x25, 0x96, 0x84, 0xF8, 0x35, 0x01, 0x2B, 0xD1, 0x3F }, + { 0x28, 0x8C, 0x4A, 0xD9, 0xB9, 0x40, 0x97, 0x62, 0xEA, 0x07, 0xC2, 0x4A, 0x41, 0xF0, 0x4F, 0x69, 0xA7, 0xD7, 0x4B, 0xEE, 0x2D, 0x95, 0x43, 0x53, 0x74, 0xBD, 0xE9, 0x46, 0xD7, 0x24, 0x1C, 0x7B }, + { 0x80, 0x56, 0x91, 0xBB, 0x28, 0x67, 0x48, 0xCF, 0xB5, 0x91, 0xD3, 0xAE, 0xBE, 0x7E, 0x6F, 0x4E, 0x4D, 0xC6, 0xE2, 0x80, 0x8C, 0x65, 0x14, 0x3C, 0xC0, 0x04, 0xE4, 0xEB, 0x6F, 0xD0, 0x9D, 0x43 }, + { 0xD4, 0xAC, 0x8D, 0x3A, 0x0A, 0xFC, 0x6C, 0xFA, 0x7B, 0x46, 0x0A, 0xE3, 0x00, 0x1B, 0xAE, 0xB3, 0x6D, 0xAD, 0xB3, 0x7D, 0xA0, 0x7D, 0x2E, 0x8A, 0xC9, 0x18, 0x22, 0xDF, 0x34, 0x8A, 0xED, 0x3D }, + { 0xC3, 0x76, 0x61, 0x70, 0x14, 0xD2, 0x01, 0x58, 0xBC, 0xED, 0x3D, 0x3B, 0xA5, 0x52, 0xB6, 0xEC, 0xCF, 0x84, 0xE6, 0x2A, 0xA3, 0xEB, 0x65, 0x0E, 0x90, 0x02, 0x9C, 0x84, 0xD1, 0x3E, 0xEA, 0x69 }, + { 0xC4, 0x1F, 0x09, 0xF4, 0x3C, 0xEC, 0xAE, 0x72, 0x93, 0xD6, 0x00, 0x7C, 0xA0, 0xA3, 0x57, 0x08, 0x7D, 0x5A, 0xE5, 0x9B, 0xE5, 0x00, 0xC1, 0xCD, 0x5B, 0x28, 0x9E, 0xE8, 0x10, 0xC7, 0xB0, 0x82 }, + { 0x03, 0xD1, 0xCE, 0xD1, 0xFB, 0xA5, 0xC3, 0x91, 0x55, 0xC4, 0x4B, 0x77, 0x65, 0xCB, 0x76, 0x0C, 0x78, 0x70, 0x8D, 0xCF, 0xC8, 0x0B, 0x0B, 0xD8, 0xAD, 0xE3, 0xA5, 0x6D, 0xA8, 0x83, 0x0B, 0x29 }, + { 0x09, 0xBD, 0xE6, 0xF1, 0x52, 0x21, 0x8D, 0xC9, 0x2C, 0x41, 0xD7, 0xF4, 0x53, 0x87, 0xE6, 0x3E, 0x58, 0x69, 0xD8, 0x07, 0xEC, 0x70, 0xB8, 0x21, 0x40, 0x5D, 0xBD, 0x88, 0x4B, 0x7F, 0xCF, 0x4B }, + { 0x71, 0xC9, 0x03, 0x6E, 0x18, 0x17, 0x9B, 0x90, 0xB3, 0x7D, 0x39, 0xE9, 0xF0, 0x5E, 0xB8, 0x9C, 0xC5, 0xFC, 0x34, 0x1F, 0xD7, 0xC4, 0x77, 0xD0, 0xD7, 0x49, 0x32, 0x85, 0xFA, 0xCA, 0x08, 0xA4 }, + { 0x59, 0x16, 0x83, 0x3E, 0xBB, 0x05, 0xCD, 0x91, 0x9C, 0xA7, 0xFE, 0x83, 0xB6, 0x92, 0xD3, 0x20, 0x5B, 0xEF, 0x72, 0x39, 0x2B, 0x2C, 0xF6, 0xBB, 0x0A, 0x6D, 0x43, 0xF9, 0x94, 0xF9, 0x5F, 0x11 }, + { 0xF6, 0x3A, 0xAB, 0x3E, 0xC6, 0x41, 0xB3, 0xB0, 0x24, 0x96, 0x4C, 0x2B, 0x43, 0x7C, 0x04, 0xF6, 0x04, 0x3C, 0x4C, 0x7E, 0x02, 0x79, 0x23, 0x99, 0x95, 0x40, 0x19, 0x58, 0xF8, 0x6B, 0xBE, 0x54 }, + { 0xF1, 0x72, 0xB1, 0x80, 0xBF, 0xB0, 0x97, 0x40, 0x49, 0x31, 0x20, 0xB6, 0x32, 0x6C, 0xBD, 0xC5, 0x61, 0xE4, 0x77, 0xDE, 0xF9, 0xBB, 0xCF, 0xD2, 0x8C, 0xC8, 0xC1, 0xC5, 0xE3, 0x37, 0x9A, 0x31 }, + { 0xCB, 0x9B, 0x89, 0xCC, 0x18, 0x38, 0x1D, 0xD9, 0x14, 0x1A, 0xDE, 0x58, 0x86, 0x54, 0xD4, 0xE6, 0xA2, 0x31, 0xD5, 0xBF, 0x49, 0xD4, 0xD5, 0x9A, 0xC2, 0x7D, 0x86, 0x9C, 0xBE, 0x10, 0x0C, 0xF3 }, + { 0x7B, 0xD8, 0x81, 0x50, 0x46, 0xFD, 0xD8, 0x10, 0xA9, 0x23, 0xE1, 0x98, 0x4A, 0xAE, 0xBD, 0xCD, 0xF8, 0x4D, 0x87, 0xC8, 0x99, 0x2D, 0x68, 0xB5, 0xEE, 0xB4, 0x60, 0xF9, 0x3E, 0xB3, 0xC8, 0xD7 }, + { 0x60, 0x7B, 0xE6, 0x68, 0x62, 0xFD, 0x08, 0xEE, 0x5B, 0x19, 0xFA, 0xCA, 0xC0, 0x9D, 0xFD, 0xBC, 0xD4, 0x0C, 0x31, 0x21, 0x01, 0xD6, 0x6E, 0x6E, 0xBD, 0x2B, 0x84, 0x1F, 0x1B, 0x9A, 0x93, 0x25 }, + { 0x9F, 0xE0, 0x3B, 0xBE, 0x69, 0xAB, 0x18, 0x34, 0xF5, 0x21, 0x9B, 0x0D, 0xA8, 0x8A, 0x08, 0xB3, 0x0A, 0x66, 0xC5, 0x91, 0x3F, 0x01, 0x51, 0x96, 0x3C, 0x36, 0x05, 0x60, 0xDB, 0x03, 0x87, 0xB3 }, + { 0x90, 0xA8, 0x35, 0x85, 0x71, 0x7B, 0x75, 0xF0, 0xE9, 0xB7, 0x25, 0xE0, 0x55, 0xEE, 0xEE, 0xB9, 0xE7, 0xA0, 0x28, 0xEA, 0x7E, 0x6C, 0xBC, 0x07, 0xB2, 0x09, 0x17, 0xEC, 0x03, 0x63, 0xE3, 0x8C }, + { 0x33, 0x6E, 0xA0, 0x53, 0x0F, 0x4A, 0x74, 0x69, 0x12, 0x6E, 0x02, 0x18, 0x58, 0x7E, 0xBB, 0xDE, 0x33, 0x58, 0xA0, 0xB3, 0x1C, 0x29, 0xD2, 0x00, 0xF7, 0xDC, 0x7E, 0xB1, 0x5C, 0x6A, 0xAD, 0xD8 }, + { 0xA7, 0x9E, 0x76, 0xDC, 0x0A, 0xBC, 0xA4, 0x39, 0x6F, 0x07, 0x47, 0xCD, 0x7B, 0x74, 0x8D, 0xF9, 0x13, 0x00, 0x76, 0x26, 0xB1, 0xD6, 0x59, 0xDA, 0x0C, 0x1F, 0x78, 0xB9, 0x30, 0x3D, 0x01, 0xA3 }, + { 0x44, 0xE7, 0x8A, 0x77, 0x37, 0x56, 0xE0, 0x95, 0x15, 0x19, 0x50, 0x4D, 0x70, 0x38, 0xD2, 0x8D, 0x02, 0x13, 0xA3, 0x7E, 0x0C, 0xE3, 0x75, 0x37, 0x17, 0x57, 0xBC, 0x99, 0x63, 0x11, 0xE3, 0xB8 }, + { 0x77, 0xAC, 0x01, 0x2A, 0x3F, 0x75, 0x4D, 0xCF, 0xEA, 0xB5, 0xEB, 0x99, 0x6B, 0xE9, 0xCD, 0x2D, 0x1F, 0x96, 0x11, 0x1B, 0x6E, 0x49, 0xF3, 0x99, 0x4D, 0xF1, 0x81, 0xF2, 0x85, 0x69, 0xD8, 0x25 }, + { 0xCE, 0x5A, 0x10, 0xDB, 0x6F, 0xCC, 0xDA, 0xF1, 0x40, 0xAA, 0xA4, 0xDE, 0xD6, 0x25, 0x0A, 0x9C, 0x06, 0xE9, 0x22, 0x2B, 0xC9, 0xF9, 0xF3, 0x65, 0x8A, 0x4A, 0xFF, 0x93, 0x5F, 0x2B, 0x9F, 0x3A }, + { 0xEC, 0xC2, 0x03, 0xA7, 0xFE, 0x2B, 0xE4, 0xAB, 0xD5, 0x5B, 0xB5, 0x3E, 0x6E, 0x67, 0x35, 0x72, 0xE0, 0x07, 0x8D, 0xA8, 0xCD, 0x37, 0x5E, 0xF4, 0x30, 0xCC, 0x97, 0xF9, 0xF8, 0x00, 0x83, 0xAF }, + { 0x14, 0xA5, 0x18, 0x6D, 0xE9, 0xD7, 0xA1, 0x8B, 0x04, 0x12, 0xB8, 0x56, 0x3E, 0x51, 0xCC, 0x54, 0x33, 0x84, 0x0B, 0x4A, 0x12, 0x9A, 0x8F, 0xF9, 0x63, 0xB3, 0x3A, 0x3C, 0x4A, 0xFE, 0x8E, 0xBB }, + { 0x13, 0xF8, 0xEF, 0x95, 0xCB, 0x86, 0xE6, 0xA6, 0x38, 0x93, 0x1C, 0x8E, 0x10, 0x76, 0x73, 0xEB, 0x76, 0xBA, 0x10, 0xD7, 0xC2, 0xCD, 0x70, 0xB9, 0xD9, 0x92, 0x0B, 0xBE, 0xED, 0x92, 0x94, 0x09 }, + { 0x0B, 0x33, 0x8F, 0x4E, 0xE1, 0x2F, 0x2D, 0xFC, 0xB7, 0x87, 0x13, 0x37, 0x79, 0x41, 0xE0, 0xB0, 0x63, 0x21, 0x52, 0x58, 0x1D, 0x13, 0x32, 0x51, 0x6E, 0x4A, 0x2C, 0xAB, 0x19, 0x42, 0xCC, 0xA4 }, + { 0xEA, 0xAB, 0x0E, 0xC3, 0x7B, 0x3B, 0x8A, 0xB7, 0x96, 0xE9, 0xF5, 0x72, 0x38, 0xDE, 0x14, 0xA2, 0x64, 0xA0, 0x76, 0xF3, 0x88, 0x7D, 0x86, 0xE2, 0x9B, 0xB5, 0x90, 0x6D, 0xB5, 0xA0, 0x0E, 0x02 }, + { 0x23, 0xCB, 0x68, 0xB8, 0xC0, 0xE6, 0xDC, 0x26, 0xDC, 0x27, 0x76, 0x6D, 0xDC, 0x0A, 0x13, 0xA9, 0x94, 0x38, 0xFD, 0x55, 0x61, 0x7A, 0xA4, 0x09, 0x5D, 0x8F, 0x96, 0x97, 0x20, 0xC8, 0x72, 0xDF }, + { 0x09, 0x1D, 0x8E, 0xE3, 0x0D, 0x6F, 0x29, 0x68, 0xD4, 0x6B, 0x68, 0x7D, 0xD6, 0x52, 0x92, 0x66, 0x57, 0x42, 0xDE, 0x0B, 0xB8, 0x3D, 0xCC, 0x00, 0x04, 0xC7, 0x2C, 0xE1, 0x00, 0x07, 0xA5, 0x49 }, + { 0x7F, 0x50, 0x7A, 0xBC, 0x6D, 0x19, 0xBA, 0x00, 0xC0, 0x65, 0xA8, 0x76, 0xEC, 0x56, 0x57, 0x86, 0x88, 0x82, 0xD1, 0x8A, 0x22, 0x1B, 0xC4, 0x6C, 0x7A, 0x69, 0x12, 0x54, 0x1F, 0x5B, 0xC7, 0xBA }, + { 0xA0, 0x60, 0x7C, 0x24, 0xE1, 0x4E, 0x8C, 0x22, 0x3D, 0xB0, 0xD7, 0x0B, 0x4D, 0x30, 0xEE, 0x88, 0x01, 0x4D, 0x60, 0x3F, 0x43, 0x7E, 0x9E, 0x02, 0xAA, 0x7D, 0xAF, 0xA3, 0xCD, 0xFB, 0xAD, 0x94 }, + { 0xDD, 0xBF, 0xEA, 0x75, 0xCC, 0x46, 0x78, 0x82, 0xEB, 0x34, 0x83, 0xCE, 0x5E, 0x2E, 0x75, 0x6A, 0x4F, 0x47, 0x01, 0xB7, 0x6B, 0x44, 0x55, 0x19, 0xE8, 0x9F, 0x22, 0xD6, 0x0F, 0xA8, 0x6E, 0x06 }, + { 0x0C, 0x31, 0x1F, 0x38, 0xC3, 0x5A, 0x4F, 0xB9, 0x0D, 0x65, 0x1C, 0x28, 0x9D, 0x48, 0x68, 0x56, 0xCD, 0x14, 0x13, 0xDF, 0x9B, 0x06, 0x77, 0xF5, 0x3E, 0xCE, 0x2C, 0xD9, 0xE4, 0x77, 0xC6, 0x0A }, + { 0x46, 0xA7, 0x3A, 0x8D, 0xD3, 0xE7, 0x0F, 0x59, 0xD3, 0x94, 0x2C, 0x01, 0xDF, 0x59, 0x9D, 0xEF, 0x78, 0x3C, 0x9D, 0xA8, 0x2F, 0xD8, 0x32, 0x22, 0xCD, 0x66, 0x2B, 0x53, 0xDC, 0xE7, 0xDB, 0xDF }, + { 0xAD, 0x03, 0x8F, 0xF9, 0xB1, 0x4D, 0xE8, 0x4A, 0x80, 0x1E, 0x4E, 0x62, 0x1C, 0xE5, 0xDF, 0x02, 0x9D, 0xD9, 0x35, 0x20, 0xD0, 0xC2, 0xFA, 0x38, 0xBF, 0xF1, 0x76, 0xA8, 0xB1, 0xD1, 0x69, 0x8C }, + { 0xAB, 0x70, 0xC5, 0xDF, 0xBD, 0x1E, 0xA8, 0x17, 0xFE, 0xD0, 0xCD, 0x06, 0x72, 0x93, 0xAB, 0xF3, 0x19, 0xE5, 0xD7, 0x90, 0x1C, 0x21, 0x41, 0xD5, 0xD9, 0x9B, 0x23, 0xF0, 0x3A, 0x38, 0xE7, 0x48 }, + { 0x1F, 0xFF, 0xDA, 0x67, 0x93, 0x2B, 0x73, 0xC8, 0xEC, 0xAF, 0x00, 0x9A, 0x34, 0x91, 0xA0, 0x26, 0x95, 0x3B, 0xAB, 0xFE, 0x1F, 0x66, 0x3B, 0x06, 0x97, 0xC3, 0xC4, 0xAE, 0x8B, 0x2E, 0x7D, 0xCB }, + { 0xB0, 0xD2, 0xCC, 0x19, 0x47, 0x2D, 0xD5, 0x7F, 0x2B, 0x17, 0xEF, 0xC0, 0x3C, 0x8D, 0x58, 0xC2, 0x28, 0x3D, 0xBB, 0x19, 0xDA, 0x57, 0x2F, 0x77, 0x55, 0x85, 0x5A, 0xA9, 0x79, 0x43, 0x17, 0xA0 }, + { 0xA0, 0xD1, 0x9A, 0x6E, 0xE3, 0x39, 0x79, 0xC3, 0x25, 0x51, 0x0E, 0x27, 0x66, 0x22, 0xDF, 0x41, 0xF7, 0x15, 0x83, 0xD0, 0x75, 0x01, 0xB8, 0x70, 0x71, 0x12, 0x9A, 0x0A, 0xD9, 0x47, 0x32, 0xA5 }, + { 0x72, 0x46, 0x42, 0xA7, 0x03, 0x2D, 0x10, 0x62, 0xB8, 0x9E, 0x52, 0xBE, 0xA3, 0x4B, 0x75, 0xDF, 0x7D, 0x8F, 0xE7, 0x72, 0xD9, 0xFE, 0x3C, 0x93, 0xDD, 0xF3, 0xC4, 0x54, 0x5A, 0xB5, 0xA9, 0x9B }, + { 0xAD, 0xE5, 0xEA, 0xA7, 0xE6, 0x1F, 0x67, 0x2D, 0x58, 0x7E, 0xA0, 0x3D, 0xAE, 0x7D, 0x7B, 0x55, 0x22, 0x9C, 0x01, 0xD0, 0x6B, 0xC0, 0xA5, 0x70, 0x14, 0x36, 0xCB, 0xD1, 0x83, 0x66, 0xA6, 0x26 }, + { 0x01, 0x3B, 0x31, 0xEB, 0xD2, 0x28, 0xFC, 0xDD, 0xA5, 0x1F, 0xAB, 0xB0, 0x3B, 0xB0, 0x2D, 0x60, 0xAC, 0x20, 0xCA, 0x21, 0x5A, 0xAF, 0xA8, 0x3B, 0xDD, 0x85, 0x5E, 0x37, 0x55, 0xA3, 0x5F, 0x0B }, + { 0x33, 0x2E, 0xD4, 0x0B, 0xB1, 0x0D, 0xDE, 0x3C, 0x95, 0x4A, 0x75, 0xD7, 0xB8, 0x99, 0x9D, 0x4B, 0x26, 0xA1, 0xC0, 0x63, 0xC1, 0xDC, 0x6E, 0x32, 0xC1, 0xD9, 0x1B, 0xAB, 0x7B, 0xBB, 0x7D, 0x16 }, + { 0xC7, 0xA1, 0x97, 0xB3, 0xA0, 0x5B, 0x56, 0x6B, 0xCC, 0x9F, 0xAC, 0xD2, 0x0E, 0x44, 0x1D, 0x6F, 0x6C, 0x28, 0x60, 0xAC, 0x96, 0x51, 0xCD, 0x51, 0xD6, 0xB9, 0xD2, 0xCD, 0xEE, 0xEA, 0x03, 0x90 }, + { 0xBD, 0x9C, 0xF6, 0x4E, 0xA8, 0x95, 0x3C, 0x03, 0x71, 0x08, 0xE6, 0xF6, 0x54, 0x91, 0x4F, 0x39, 0x58, 0xB6, 0x8E, 0x29, 0xC1, 0x67, 0x00, 0xDC, 0x18, 0x4D, 0x94, 0xA2, 0x17, 0x08, 0xFF, 0x60 }, + { 0x88, 0x35, 0xB0, 0xAC, 0x02, 0x11, 0x51, 0xDF, 0x71, 0x64, 0x74, 0xCE, 0x27, 0xCE, 0x4D, 0x3C, 0x15, 0xF0, 0xB2, 0xDA, 0xB4, 0x80, 0x03, 0xCF, 0x3F, 0x3E, 0xFD, 0x09, 0x45, 0x10, 0x6B, 0x9A }, + { 0x3B, 0xFE, 0xFA, 0x33, 0x01, 0xAA, 0x55, 0xC0, 0x80, 0x19, 0x0C, 0xFF, 0xDA, 0x8E, 0xAE, 0x51, 0xD9, 0xAF, 0x48, 0x8B, 0x4C, 0x1F, 0x24, 0xC3, 0xD9, 0xA7, 0x52, 0x42, 0xFD, 0x8E, 0xA0, 0x1D }, + { 0x08, 0x28, 0x4D, 0x14, 0x99, 0x3C, 0xD4, 0x7D, 0x53, 0xEB, 0xAE, 0xCF, 0x0D, 0xF0, 0x47, 0x8C, 0xC1, 0x82, 0xC8, 0x9C, 0x00, 0xE1, 0x85, 0x9C, 0x84, 0x85, 0x16, 0x86, 0xDD, 0xF2, 0xC1, 0xB7 }, + { 0x1E, 0xD7, 0xEF, 0x9F, 0x04, 0xC2, 0xAC, 0x8D, 0xB6, 0xA8, 0x64, 0xDB, 0x13, 0x10, 0x87, 0xF2, 0x70, 0x65, 0x09, 0x8E, 0x69, 0xC3, 0xFE, 0x78, 0x71, 0x8D, 0x9B, 0x94, 0x7F, 0x4A, 0x39, 0xD0 }, + { 0xC1, 0x61, 0xF2, 0xDC, 0xD5, 0x7E, 0x9C, 0x14, 0x39, 0xB3, 0x1A, 0x9D, 0xD4, 0x3D, 0x8F, 0x3D, 0x7D, 0xD8, 0xF0, 0xEB, 0x7C, 0xFA, 0xC6, 0xFB, 0x25, 0xA0, 0xF2, 0x8E, 0x30, 0x6F, 0x06, 0x61 }, + { 0xC0, 0x19, 0x69, 0xAD, 0x34, 0xC5, 0x2C, 0xAF, 0x3D, 0xC4, 0xD8, 0x0D, 0x19, 0x73, 0x5C, 0x29, 0x73, 0x1A, 0xC6, 0xE7, 0xA9, 0x20, 0x85, 0xAB, 0x92, 0x50, 0xC4, 0x8D, 0xEA, 0x48, 0xA3, 0xFC }, + { 0x17, 0x20, 0xB3, 0x65, 0x56, 0x19, 0xD2, 0xA5, 0x2B, 0x35, 0x21, 0xAE, 0x0E, 0x49, 0xE3, 0x45, 0xCB, 0x33, 0x89, 0xEB, 0xD6, 0x20, 0x8A, 0xCA, 0xF9, 0xF1, 0x3F, 0xDA, 0xCC, 0xA8, 0xBE, 0x49 }, + { 0x75, 0x62, 0x88, 0x36, 0x1C, 0x83, 0xE2, 0x4C, 0x61, 0x7C, 0xF9, 0x5C, 0x90, 0x5B, 0x22, 0xD0, 0x17, 0xCD, 0xC8, 0x6F, 0x0B, 0xF1, 0xD6, 0x58, 0xF4, 0x75, 0x6C, 0x73, 0x79, 0x87, 0x3B, 0x7F }, + { 0xE7, 0xD0, 0xED, 0xA3, 0x45, 0x26, 0x93, 0xB7, 0x52, 0xAB, 0xCD, 0xA1, 0xB5, 0x5E, 0x27, 0x6F, 0x82, 0x69, 0x8F, 0x5F, 0x16, 0x05, 0x40, 0x3E, 0xFF, 0x83, 0x0B, 0xEA, 0x00, 0x71, 0xA3, 0x94 }, + { 0x2C, 0x82, 0xEC, 0xAA, 0x6B, 0x84, 0x80, 0x3E, 0x04, 0x4A, 0xF6, 0x31, 0x18, 0xAF, 0xE5, 0x44, 0x68, 0x7C, 0xB6, 0xE6, 0xC7, 0xDF, 0x49, 0xED, 0x76, 0x2D, 0xFD, 0x7C, 0x86, 0x93, 0xA1, 0xBC }, + { 0x61, 0x36, 0xCB, 0xF4, 0xB4, 0x41, 0x05, 0x6F, 0xA1, 0xE2, 0x72, 0x24, 0x98, 0x12, 0x5D, 0x6D, 0xED, 0x45, 0xE1, 0x7B, 0x52, 0x14, 0x39, 0x59, 0xC7, 0xF4, 0xD4, 0xE3, 0x95, 0x21, 0x8A, 0xC2 }, + { 0x72, 0x1D, 0x32, 0x45, 0xAA, 0xFE, 0xF2, 0x7F, 0x6A, 0x62, 0x4F, 0x47, 0x95, 0x4B, 0x6C, 0x25, 0x50, 0x79, 0x52, 0x6F, 0xFA, 0x25, 0xE9, 0xFF, 0x77, 0xE5, 0xDC, 0xFF, 0x47, 0x3B, 0x15, 0x97 }, + { 0x9D, 0xD2, 0xFB, 0xD8, 0xCE, 0xF1, 0x6C, 0x35, 0x3C, 0x0A, 0xC2, 0x11, 0x91, 0xD5, 0x09, 0xEB, 0x28, 0xDD, 0x9E, 0x3E, 0x0D, 0x8C, 0xEA, 0x5D, 0x26, 0xCA, 0x83, 0x93, 0x93, 0x85, 0x1C, 0x3A }, + { 0xB2, 0x39, 0x4C, 0xEA, 0xCD, 0xEB, 0xF2, 0x1B, 0xF9, 0xDF, 0x2C, 0xED, 0x98, 0xE5, 0x8F, 0x1C, 0x3A, 0x4B, 0xBB, 0xFF, 0x66, 0x0D, 0xD9, 0x00, 0xF6, 0x22, 0x02, 0xD6, 0x78, 0x5C, 0xC4, 0x6E }, + { 0x57, 0x08, 0x9F, 0x22, 0x27, 0x49, 0xAD, 0x78, 0x71, 0x76, 0x5F, 0x06, 0x2B, 0x11, 0x4F, 0x43, 0xBA, 0x20, 0xEC, 0x56, 0x42, 0x2A, 0x8B, 0x1E, 0x3F, 0x87, 0x19, 0x2C, 0x0E, 0xA7, 0x18, 0xC6 }, + { 0xE4, 0x9A, 0x94, 0x59, 0x96, 0x1C, 0xD3, 0x3C, 0xDF, 0x4A, 0xAE, 0x1B, 0x10, 0x78, 0xA5, 0xDE, 0xA7, 0xC0, 0x40, 0xE0, 0xFE, 0xA3, 0x40, 0xC9, 0x3A, 0x72, 0x48, 0x72, 0xFC, 0x4A, 0xF8, 0x06 }, + { 0xED, 0xE6, 0x7F, 0x72, 0x0E, 0xFF, 0xD2, 0xCA, 0x9C, 0x88, 0x99, 0x41, 0x52, 0xD0, 0x20, 0x1D, 0xEE, 0x6B, 0x0A, 0x2D, 0x2C, 0x07, 0x7A, 0xCA, 0x6D, 0xAE, 0x29, 0xF7, 0x3F, 0x8B, 0x63, 0x09 }, + { 0xE0, 0xF4, 0x34, 0xBF, 0x22, 0xE3, 0x08, 0x80, 0x39, 0xC2, 0x1F, 0x71, 0x9F, 0xFC, 0x67, 0xF0, 0xF2, 0xCB, 0x5E, 0x98, 0xA7, 0xA0, 0x19, 0x4C, 0x76, 0xE9, 0x6B, 0xF4, 0xE8, 0xE1, 0x7E, 0x61 }, + { 0x27, 0x7C, 0x04, 0xE2, 0x85, 0x34, 0x84, 0xA4, 0xEB, 0xA9, 0x10, 0xAD, 0x33, 0x6D, 0x01, 0xB4, 0x77, 0xB6, 0x7C, 0xC2, 0x00, 0xC5, 0x9F, 0x3C, 0x8D, 0x77, 0xEE, 0xF8, 0x49, 0x4F, 0x29, 0xCD }, + { 0x15, 0x6D, 0x57, 0x47, 0xD0, 0xC9, 0x9C, 0x7F, 0x27, 0x09, 0x7D, 0x7B, 0x7E, 0x00, 0x2B, 0x2E, 0x18, 0x5C, 0xB7, 0x2D, 0x8D, 0xD7, 0xEB, 0x42, 0x4A, 0x03, 0x21, 0x52, 0x81, 0x61, 0x21, 0x9F }, + { 0x20, 0xDD, 0xD1, 0xED, 0x9B, 0x1C, 0xA8, 0x03, 0x94, 0x6D, 0x64, 0xA8, 0x3A, 0xE4, 0x65, 0x9D, 0xA6, 0x7F, 0xBA, 0x7A, 0x1A, 0x3E, 0xDD, 0xB1, 0xE1, 0x03, 0xC0, 0xF5, 0xE0, 0x3E, 0x3A, 0x2C }, + { 0xF0, 0xAF, 0x60, 0x4D, 0x3D, 0xAB, 0xBF, 0x9A, 0x0F, 0x2A, 0x7D, 0x3D, 0xDA, 0x6B, 0xD3, 0x8B, 0xBA, 0x72, 0xC6, 0xD0, 0x9B, 0xE4, 0x94, 0xFC, 0xEF, 0x71, 0x3F, 0xF1, 0x01, 0x89, 0xB6, 0xE6 }, + { 0x98, 0x02, 0xBB, 0x87, 0xDE, 0xF4, 0xCC, 0x10, 0xC4, 0xA5, 0xFD, 0x49, 0xAA, 0x58, 0xDF, 0xE2, 0xF3, 0xFD, 0xDB, 0x46, 0xB4, 0x70, 0x88, 0x14, 0xEA, 0xD8, 0x1D, 0x23, 0xBA, 0x95, 0x13, 0x9B }, + { 0x4F, 0x8C, 0xE1, 0xE5, 0x1D, 0x2F, 0xE7, 0xF2, 0x40, 0x43, 0xA9, 0x04, 0xD8, 0x98, 0xEB, 0xFC, 0x91, 0x97, 0x54, 0x18, 0x75, 0x34, 0x13, 0xAA, 0x09, 0x9B, 0x79, 0x5E, 0xCB, 0x35, 0xCE, 0xDB }, + { 0xBD, 0xDC, 0x65, 0x14, 0xD7, 0xEE, 0x6A, 0xCE, 0x0A, 0x4A, 0xC1, 0xD0, 0xE0, 0x68, 0x11, 0x22, 0x88, 0xCB, 0xCF, 0x56, 0x04, 0x54, 0x64, 0x27, 0x05, 0x63, 0x01, 0x77, 0xCB, 0xA6, 0x08, 0xBD }, + { 0xD6, 0x35, 0x99, 0x4F, 0x62, 0x91, 0x51, 0x7B, 0x02, 0x81, 0xFF, 0xDD, 0x49, 0x6A, 0xFA, 0x86, 0x27, 0x12, 0xE5, 0xB3, 0xC4, 0xE5, 0x2E, 0x4C, 0xD5, 0xFD, 0xAE, 0x8C, 0x0E, 0x72, 0xFB, 0x08 }, + { 0x87, 0x8D, 0x9C, 0xA6, 0x00, 0xCF, 0x87, 0xE7, 0x69, 0xCC, 0x30, 0x5C, 0x1B, 0x35, 0x25, 0x51, 0x86, 0x61, 0x5A, 0x73, 0xA0, 0xDA, 0x61, 0x3B, 0x5F, 0x1C, 0x98, 0xDB, 0xF8, 0x12, 0x83, 0xEA }, + { 0xA6, 0x4E, 0xBE, 0x5D, 0xC1, 0x85, 0xDE, 0x9F, 0xDD, 0xE7, 0x60, 0x7B, 0x69, 0x98, 0x70, 0x2E, 0xB2, 0x34, 0x56, 0x18, 0x49, 0x57, 0x30, 0x7D, 0x2F, 0xA7, 0x2E, 0x87, 0xA4, 0x77, 0x02, 0xD6 }, + { 0xCE, 0x50, 0xEA, 0xB7, 0xB5, 0xEB, 0x52, 0xBD, 0xC9, 0xAD, 0x8E, 0x5A, 0x48, 0x0A, 0xB7, 0x80, 0xCA, 0x93, 0x20, 0xE4, 0x43, 0x60, 0xB1, 0xFE, 0x37, 0xE0, 0x3F, 0x2F, 0x7A, 0xD7, 0xDE, 0x01 }, + { 0xEE, 0xDD, 0xB7, 0xC0, 0xDB, 0x6E, 0x30, 0xAB, 0xE6, 0x6D, 0x79, 0xE3, 0x27, 0x51, 0x1E, 0x61, 0xFC, 0xEB, 0xBC, 0x29, 0xF1, 0x59, 0xB4, 0x0A, 0x86, 0xB0, 0x46, 0xEC, 0xF0, 0x51, 0x38, 0x23 }, + { 0x78, 0x7F, 0xC9, 0x34, 0x40, 0xC1, 0xEC, 0x96, 0xB5, 0xAD, 0x01, 0xC1, 0x6C, 0xF7, 0x79, 0x16, 0xA1, 0x40, 0x5F, 0x94, 0x26, 0x35, 0x6E, 0xC9, 0x21, 0xD8, 0xDF, 0xF3, 0xEA, 0x63, 0xB7, 0xE0 }, + { 0x7F, 0x0D, 0x5E, 0xAB, 0x47, 0xEE, 0xFD, 0xA6, 0x96, 0xC0, 0xBF, 0x0F, 0xBF, 0x86, 0xAB, 0x21, 0x6F, 0xCE, 0x46, 0x1E, 0x93, 0x03, 0xAB, 0xA6, 0xAC, 0x37, 0x41, 0x20, 0xE8, 0x90, 0xE8, 0xDF }, + { 0xB6, 0x80, 0x04, 0xB4, 0x2F, 0x14, 0xAD, 0x02, 0x9F, 0x4C, 0x2E, 0x03, 0xB1, 0xD5, 0xEB, 0x76, 0xD5, 0x71, 0x60, 0xE2, 0x64, 0x76, 0xD2, 0x11, 0x31, 0xBE, 0xF2, 0x0A, 0xDA, 0x7D, 0x27, 0xF4 }, + { 0xB0, 0xC4, 0xEB, 0x18, 0xAE, 0x25, 0x0B, 0x51, 0xA4, 0x13, 0x82, 0xEA, 0xD9, 0x2D, 0x0D, 0xC7, 0x45, 0x5F, 0x93, 0x79, 0xFC, 0x98, 0x84, 0x42, 0x8E, 0x47, 0x70, 0x60, 0x8D, 0xB0, 0xFA, 0xEC }, + { 0xF9, 0x2B, 0x7A, 0x87, 0x0C, 0x05, 0x9F, 0x4D, 0x46, 0x46, 0x4C, 0x82, 0x4E, 0xC9, 0x63, 0x55, 0x14, 0x0B, 0xDC, 0xE6, 0x81, 0x32, 0x2C, 0xC3, 0xA9, 0x92, 0xFF, 0x10, 0x3E, 0x3F, 0xEA, 0x52 }, + { 0x53, 0x64, 0x31, 0x26, 0x14, 0x81, 0x33, 0x98, 0xCC, 0x52, 0x5D, 0x4C, 0x4E, 0x14, 0x6E, 0xDE, 0xB3, 0x71, 0x26, 0x5F, 0xBA, 0x19, 0x13, 0x3A, 0x2C, 0x3D, 0x21, 0x59, 0x29, 0x8A, 0x17, 0x42 }, + { 0xF6, 0x62, 0x0E, 0x68, 0xD3, 0x7F, 0xB2, 0xAF, 0x50, 0x00, 0xFC, 0x28, 0xE2, 0x3B, 0x83, 0x22, 0x97, 0xEC, 0xD8, 0xBC, 0xE9, 0x9E, 0x8B, 0xE4, 0xD0, 0x4E, 0x85, 0x30, 0x9E, 0x3D, 0x33, 0x74 }, + { 0x53, 0x16, 0xA2, 0x79, 0x69, 0xD7, 0xFE, 0x04, 0xFF, 0x27, 0xB2, 0x83, 0x96, 0x1B, 0xFF, 0xC3, 0xBF, 0x5D, 0xFB, 0x32, 0xFB, 0x6A, 0x89, 0xD1, 0x01, 0xC6, 0xC3, 0xB1, 0x93, 0x7C, 0x28, 0x71 }, + { 0x81, 0xD1, 0x66, 0x4F, 0xDF, 0x3C, 0xB3, 0x3C, 0x24, 0xEE, 0xBA, 0xC0, 0xBD, 0x64, 0x24, 0x4B, 0x77, 0xC4, 0xAB, 0xEA, 0x90, 0xBB, 0xE8, 0xB5, 0xEE, 0x0B, 0x2A, 0xAF, 0xCF, 0x2D, 0x6A, 0x53 }, + { 0x34, 0x57, 0x82, 0xF2, 0x95, 0xB0, 0x88, 0x03, 0x52, 0xE9, 0x24, 0xA0, 0x46, 0x7B, 0x5F, 0xBC, 0x3E, 0x8F, 0x3B, 0xFB, 0xC3, 0xC7, 0xE4, 0x8B, 0x67, 0x09, 0x1F, 0xB5, 0xE8, 0x0A, 0x94, 0x42 }, + { 0x79, 0x41, 0x11, 0xEA, 0x6C, 0xD6, 0x5E, 0x31, 0x1F, 0x74, 0xEE, 0x41, 0xD4, 0x76, 0xCB, 0x63, 0x2C, 0xE1, 0xE4, 0xB0, 0x51, 0xDC, 0x1D, 0x9E, 0x9D, 0x06, 0x1A, 0x19, 0xE1, 0xD0, 0xBB, 0x49 }, + { 0x2A, 0x85, 0xDA, 0xF6, 0x13, 0x88, 0x16, 0xB9, 0x9B, 0xF8, 0xD0, 0x8B, 0xA2, 0x11, 0x4B, 0x7A, 0xB0, 0x79, 0x75, 0xA7, 0x84, 0x20, 0xC1, 0xA3, 0xB0, 0x6A, 0x77, 0x7C, 0x22, 0xDD, 0x8B, 0xCB }, + { 0x89, 0xB0, 0xD5, 0xF2, 0x89, 0xEC, 0x16, 0x40, 0x1A, 0x06, 0x9A, 0x96, 0x0D, 0x0B, 0x09, 0x3E, 0x62, 0x5D, 0xA3, 0xCF, 0x41, 0xEE, 0x29, 0xB5, 0x9B, 0x93, 0x0C, 0x58, 0x20, 0x14, 0x54, 0x55 }, + { 0xD0, 0xFD, 0xCB, 0x54, 0x39, 0x43, 0xFC, 0x27, 0xD2, 0x08, 0x64, 0xF5, 0x21, 0x81, 0x47, 0x1B, 0x94, 0x2C, 0xC7, 0x7C, 0xA6, 0x75, 0xBC, 0xB3, 0x0D, 0xF3, 0x1D, 0x35, 0x8E, 0xF7, 0xB1, 0xEB }, + { 0xB1, 0x7E, 0xA8, 0xD7, 0x70, 0x63, 0xC7, 0x09, 0xD4, 0xDC, 0x6B, 0x87, 0x94, 0x13, 0xC3, 0x43, 0xE3, 0x79, 0x0E, 0x9E, 0x62, 0xCA, 0x85, 0xB7, 0x90, 0x0B, 0x08, 0x6F, 0x6B, 0x75, 0xC6, 0x72 }, + { 0xE7, 0x1A, 0x3E, 0x2C, 0x27, 0x4D, 0xB8, 0x42, 0xD9, 0x21, 0x14, 0xF2, 0x17, 0xE2, 0xC0, 0xEA, 0xC8, 0xB4, 0x50, 0x93, 0xFD, 0xFD, 0x9D, 0xF4, 0xCA, 0x71, 0x62, 0x39, 0x48, 0x62, 0xD5, 0x01 }, + { 0xC0, 0x47, 0x67, 0x59, 0xAB, 0x7A, 0xA3, 0x33, 0x23, 0x4F, 0x6B, 0x44, 0xF5, 0xFD, 0x85, 0x83, 0x90, 0xEC, 0x23, 0x69, 0x4C, 0x62, 0x2C, 0xB9, 0x86, 0xE7, 0x69, 0xC7, 0x8E, 0xDD, 0x73, 0x3E }, + { 0x9A, 0xB8, 0xEA, 0xBB, 0x14, 0x16, 0x43, 0x4D, 0x85, 0x39, 0x13, 0x41, 0xD5, 0x69, 0x93, 0xC5, 0x54, 0x58, 0x16, 0x7D, 0x44, 0x18, 0xB1, 0x9A, 0x0F, 0x2A, 0xD8, 0xB7, 0x9A, 0x83, 0xA7, 0x5B }, + { 0x79, 0x92, 0xD0, 0xBB, 0xB1, 0x5E, 0x23, 0x82, 0x6F, 0x44, 0x3E, 0x00, 0x50, 0x5D, 0x68, 0xD3, 0xED, 0x73, 0x72, 0x99, 0x5A, 0x5C, 0x3E, 0x49, 0x86, 0x54, 0x10, 0x2F, 0xBC, 0xD0, 0x96, 0x4E }, + { 0xC0, 0x21, 0xB3, 0x00, 0x85, 0x15, 0x14, 0x35, 0xDF, 0x33, 0xB0, 0x07, 0xCC, 0xEC, 0xC6, 0x9D, 0xF1, 0x26, 0x9F, 0x39, 0xBA, 0x25, 0x09, 0x2B, 0xED, 0x59, 0xD9, 0x32, 0xAC, 0x0F, 0xDC, 0x28 }, + { 0x91, 0xA2, 0x5E, 0xC0, 0xEC, 0x0D, 0x9A, 0x56, 0x7F, 0x89, 0xC4, 0xBF, 0xE1, 0xA6, 0x5A, 0x0E, 0x43, 0x2D, 0x07, 0x06, 0x4B, 0x41, 0x90, 0xE2, 0x7D, 0xFB, 0x81, 0x90, 0x1F, 0xD3, 0x13, 0x9B }, + { 0x59, 0x50, 0xD3, 0x9A, 0x23, 0xE1, 0x54, 0x5F, 0x30, 0x12, 0x70, 0xAA, 0x1A, 0x12, 0xF2, 0xE6, 0xC4, 0x53, 0x77, 0x6E, 0x4D, 0x63, 0x55, 0xDE, 0x42, 0x5C, 0xC1, 0x53, 0xF9, 0x81, 0x88, 0x67 }, + { 0xD7, 0x9F, 0x14, 0x72, 0x0C, 0x61, 0x0A, 0xF1, 0x79, 0xA3, 0x76, 0x5D, 0x4B, 0x7C, 0x09, 0x68, 0xF9, 0x77, 0x96, 0x2D, 0xBF, 0x65, 0x5B, 0x52, 0x12, 0x72, 0xB6, 0xF1, 0xE1, 0x94, 0x48, 0x8E }, + { 0xE9, 0x53, 0x1B, 0xFC, 0x8B, 0x02, 0x99, 0x5A, 0xEA, 0xA7, 0x5B, 0xA2, 0x70, 0x31, 0xFA, 0xDB, 0xCB, 0xF4, 0xA0, 0xDA, 0xB8, 0x96, 0x1D, 0x92, 0x96, 0xCD, 0x7E, 0x84, 0xD2, 0x5D, 0x60, 0x06 }, + { 0x34, 0xE9, 0xC2, 0x6A, 0x01, 0xD7, 0xF1, 0x61, 0x81, 0xB4, 0x54, 0xA9, 0xD1, 0x62, 0x3C, 0x23, 0x3C, 0xB9, 0x9D, 0x31, 0xC6, 0x94, 0x65, 0x6E, 0x94, 0x13, 0xAC, 0xA3, 0xE9, 0x18, 0x69, 0x2F }, + { 0xD9, 0xD7, 0x42, 0x2F, 0x43, 0x7B, 0xD4, 0x39, 0xDD, 0xD4, 0xD8, 0x83, 0xDA, 0xE2, 0xA0, 0x83, 0x50, 0x17, 0x34, 0x14, 0xBE, 0x78, 0x15, 0x51, 0x33, 0xFF, 0xF1, 0x96, 0x4C, 0x3D, 0x79, 0x72 }, + { 0x4A, 0xEE, 0x0C, 0x7A, 0xAF, 0x07, 0x54, 0x14, 0xFF, 0x17, 0x93, 0xEA, 0xD7, 0xEA, 0xCA, 0x60, 0x17, 0x75, 0xC6, 0x15, 0xDB, 0xD6, 0x0B, 0x64, 0x0B, 0x0A, 0x9F, 0x0C, 0xE5, 0x05, 0xD4, 0x35 }, + { 0x6B, 0xFD, 0xD1, 0x54, 0x59, 0xC8, 0x3B, 0x99, 0xF0, 0x96, 0xBF, 0xB4, 0x9E, 0xE8, 0x7B, 0x06, 0x3D, 0x69, 0xC1, 0x97, 0x4C, 0x69, 0x28, 0xAC, 0xFC, 0xFB, 0x40, 0x99, 0xF8, 0xC4, 0xEF, 0x67 }, + { 0x9F, 0xD1, 0xC4, 0x08, 0xFD, 0x75, 0xC3, 0x36, 0x19, 0x3A, 0x2A, 0x14, 0xD9, 0x4F, 0x6A, 0xF5, 0xAD, 0xF0, 0x50, 0xB8, 0x03, 0x87, 0xB4, 0xB0, 0x10, 0xFB, 0x29, 0xF4, 0xCC, 0x72, 0x70, 0x7C }, + { 0x13, 0xC8, 0x84, 0x80, 0xA5, 0xD0, 0x0D, 0x6C, 0x8C, 0x7A, 0xD2, 0x11, 0x0D, 0x76, 0xA8, 0x2D, 0x9B, 0x70, 0xF4, 0xFA, 0x66, 0x96, 0xD4, 0xE5, 0xDD, 0x42, 0xA0, 0x66, 0xDC, 0xAF, 0x99, 0x20 }, + { 0x82, 0x0E, 0x72, 0x5E, 0xE2, 0x5F, 0xE8, 0xFD, 0x3A, 0x8D, 0x5A, 0xBE, 0x4C, 0x46, 0xC3, 0xBA, 0x88, 0x9D, 0xE6, 0xFA, 0x91, 0x91, 0xAA, 0x22, 0xBA, 0x67, 0xD5, 0x70, 0x54, 0x21, 0x54, 0x2B }, + { 0x32, 0xD9, 0x3A, 0x0E, 0xB0, 0x2F, 0x42, 0xFB, 0xBC, 0xAF, 0x2B, 0xAD, 0x00, 0x85, 0xB2, 0x82, 0xE4, 0x60, 0x46, 0xA4, 0xDF, 0x7A, 0xD1, 0x06, 0x57, 0xC9, 0xD6, 0x47, 0x63, 0x75, 0xB9, 0x3E }, + { 0xAD, 0xC5, 0x18, 0x79, 0x05, 0xB1, 0x66, 0x9C, 0xD8, 0xEC, 0x9C, 0x72, 0x1E, 0x19, 0x53, 0x78, 0x6B, 0x9D, 0x89, 0xA9, 0xBA, 0xE3, 0x07, 0x80, 0xF1, 0xE1, 0xEA, 0xB2, 0x4A, 0x00, 0x52, 0x3C }, + { 0xE9, 0x07, 0x56, 0xFF, 0x7F, 0x9A, 0xD8, 0x10, 0xB2, 0x39, 0xA1, 0x0C, 0xED, 0x2C, 0xF9, 0xB2, 0x28, 0x43, 0x54, 0xC1, 0xF8, 0xC7, 0xE0, 0xAC, 0xCC, 0x24, 0x61, 0xDC, 0x79, 0x6D, 0x6E, 0x89 }, + { 0x12, 0x51, 0xF7, 0x6E, 0x56, 0x97, 0x84, 0x81, 0x87, 0x53, 0x59, 0x80, 0x1D, 0xB5, 0x89, 0xA0, 0xB2, 0x2F, 0x86, 0xD8, 0xD6, 0x34, 0xDC, 0x04, 0x50, 0x6F, 0x32, 0x2E, 0xD7, 0x8F, 0x17, 0xE8 }, + { 0x3A, 0xFA, 0x89, 0x9F, 0xD9, 0x80, 0xE7, 0x3E, 0xCB, 0x7F, 0x4D, 0x8B, 0x8F, 0x29, 0x1D, 0xC9, 0xAF, 0x79, 0x6B, 0xC6, 0x5D, 0x27, 0xF9, 0x74, 0xC6, 0xF1, 0x93, 0xC9, 0x19, 0x1A, 0x09, 0xFD }, + { 0xAA, 0x30, 0x5B, 0xE2, 0x6E, 0x5D, 0xED, 0xDC, 0x3C, 0x10, 0x10, 0xCB, 0xC2, 0x13, 0xF9, 0x5F, 0x05, 0x1C, 0x78, 0x5C, 0x5B, 0x43, 0x1E, 0x6A, 0x7C, 0xD0, 0x48, 0xF1, 0x61, 0x78, 0x75, 0x28 }, + { 0x8E, 0xA1, 0x88, 0x4F, 0xF3, 0x2E, 0x9D, 0x10, 0xF0, 0x39, 0xB4, 0x07, 0xD0, 0xD4, 0x4E, 0x7E, 0x67, 0x0A, 0xBD, 0x88, 0x4A, 0xEE, 0xE0, 0xFB, 0x75, 0x7A, 0xE9, 0x4E, 0xAA, 0x97, 0x37, 0x3D }, + { 0xD4, 0x82, 0xB2, 0x15, 0x5D, 0x4D, 0xEC, 0x6B, 0x47, 0x36, 0xA1, 0xF1, 0x61, 0x7B, 0x53, 0xAA, 0xA3, 0x73, 0x10, 0x27, 0x7D, 0x3F, 0xEF, 0x0C, 0x37, 0xAD, 0x41, 0x76, 0x8F, 0xC2, 0x35, 0xB4 }, + { 0x4D, 0x41, 0x39, 0x71, 0x38, 0x7E, 0x7A, 0x88, 0x98, 0xA8, 0xDC, 0x2A, 0x27, 0x50, 0x07, 0x78, 0x53, 0x9E, 0xA2, 0x14, 0xA2, 0xDF, 0xE9, 0xB3, 0xD7, 0xE8, 0xEB, 0xDC, 0xE5, 0xCF, 0x3D, 0xB3 }, + { 0x69, 0x6E, 0x5D, 0x46, 0xE6, 0xC5, 0x7E, 0x87, 0x96, 0xE4, 0x73, 0x5D, 0x08, 0x91, 0x6E, 0x0B, 0x79, 0x29, 0xB3, 0xCF, 0x29, 0x8C, 0x29, 0x6D, 0x22, 0xE9, 0xD3, 0x01, 0x96, 0x53, 0x37, 0x1C }, + { 0x1F, 0x56, 0x47, 0xC1, 0xD3, 0xB0, 0x88, 0x22, 0x88, 0x85, 0x86, 0x5C, 0x89, 0x40, 0x90, 0x8B, 0xF4, 0x0D, 0x1A, 0x82, 0x72, 0x82, 0x19, 0x73, 0xB1, 0x60, 0x00, 0x8E, 0x7A, 0x3C, 0xE2, 0xEB }, + { 0xB6, 0xE7, 0x6C, 0x33, 0x0F, 0x02, 0x1A, 0x5B, 0xDA, 0x65, 0x87, 0x50, 0x10, 0xB0, 0xED, 0xF0, 0x91, 0x26, 0xC0, 0xF5, 0x10, 0xEA, 0x84, 0x90, 0x48, 0x19, 0x20, 0x03, 0xAE, 0xF4, 0xC6, 0x1C }, + { 0x3C, 0xD9, 0x52, 0xA0, 0xBE, 0xAD, 0xA4, 0x1A, 0xBB, 0x42, 0x4C, 0xE4, 0x7F, 0x94, 0xB4, 0x2B, 0xE6, 0x4E, 0x1F, 0xFB, 0x0F, 0xD0, 0x78, 0x22, 0x76, 0x80, 0x79, 0x46, 0xD0, 0xD0, 0xBC, 0x55 }, + { 0x98, 0xD9, 0x26, 0x77, 0x43, 0x9B, 0x41, 0xB7, 0xBB, 0x51, 0x33, 0x12, 0xAF, 0xB9, 0x2B, 0xCC, 0x8E, 0xE9, 0x68, 0xB2, 0xE3, 0xB2, 0x38, 0xCE, 0xCB, 0x9B, 0x0F, 0x34, 0xC9, 0xBB, 0x63, 0xD0 }, + { 0xEC, 0xBC, 0xA2, 0xCF, 0x08, 0xAE, 0x57, 0xD5, 0x17, 0xAD, 0x16, 0x15, 0x8A, 0x32, 0xBF, 0xA7, 0xDC, 0x03, 0x82, 0xEA, 0xED, 0xA1, 0x28, 0xE9, 0x18, 0x86, 0x73, 0x4C, 0x24, 0xA0, 0xB2, 0x9D }, + { 0x94, 0x2C, 0xC7, 0xC0, 0xB5, 0x2E, 0x2B, 0x16, 0xA4, 0xB8, 0x9F, 0xA4, 0xFC, 0x7E, 0x0B, 0xF6, 0x09, 0xE2, 0x9A, 0x08, 0xC1, 0xA8, 0x54, 0x34, 0x52, 0xB7, 0x7C, 0x7B, 0xFD, 0x11, 0xBB, 0x28 }, + { 0x8A, 0x06, 0x5D, 0x8B, 0x61, 0xA0, 0xDF, 0xFB, 0x17, 0x0D, 0x56, 0x27, 0x73, 0x5A, 0x76, 0xB0, 0xE9, 0x50, 0x60, 0x37, 0x80, 0x8C, 0xBA, 0x16, 0xC3, 0x45, 0x00, 0x7C, 0x9F, 0x79, 0xCF, 0x8F }, + { 0x1B, 0x9F, 0xA1, 0x97, 0x14, 0x65, 0x9C, 0x78, 0xFF, 0x41, 0x38, 0x71, 0x84, 0x92, 0x15, 0x36, 0x10, 0x29, 0xAC, 0x80, 0x2B, 0x1C, 0xBC, 0xD5, 0x4E, 0x40, 0x8B, 0xD8, 0x72, 0x87, 0xF8, 0x1F }, + { 0x8D, 0xAB, 0x07, 0x1B, 0xCD, 0x6C, 0x72, 0x92, 0xA9, 0xEF, 0x72, 0x7B, 0x4A, 0xE0, 0xD8, 0x67, 0x13, 0x30, 0x1D, 0xA8, 0x61, 0x8D, 0x9A, 0x48, 0xAD, 0xCE, 0x55, 0xF3, 0x03, 0xA8, 0x69, 0xA1 }, + { 0x82, 0x53, 0xE3, 0xE7, 0xC7, 0xB6, 0x84, 0xB9, 0xCB, 0x2B, 0xEB, 0x01, 0x4C, 0xE3, 0x30, 0xFF, 0x3D, 0x99, 0xD1, 0x7A, 0xBB, 0xDB, 0xAB, 0xE4, 0xF4, 0xD6, 0x74, 0xDE, 0xD5, 0x3F, 0xFC, 0x6B }, + { 0xF1, 0x95, 0xF3, 0x21, 0xE9, 0xE3, 0xD6, 0xBD, 0x7D, 0x07, 0x45, 0x04, 0xDD, 0x2A, 0xB0, 0xE6, 0x24, 0x1F, 0x92, 0xE7, 0x84, 0xB1, 0xAA, 0x27, 0x1F, 0xF6, 0x48, 0xB1, 0xCA, 0xB6, 0xD7, 0xF6 }, + { 0x27, 0xE4, 0xCC, 0x72, 0x09, 0x0F, 0x24, 0x12, 0x66, 0x47, 0x6A, 0x7C, 0x09, 0x49, 0x5F, 0x2D, 0xB1, 0x53, 0xD5, 0xBC, 0xBD, 0x76, 0x19, 0x03, 0xEF, 0x79, 0x27, 0x5E, 0xC5, 0x6B, 0x2E, 0xD8 }, + { 0x89, 0x9C, 0x24, 0x05, 0x78, 0x8E, 0x25, 0xB9, 0x9A, 0x18, 0x46, 0x35, 0x5E, 0x64, 0x6D, 0x77, 0xCF, 0x40, 0x00, 0x83, 0x41, 0x5F, 0x7D, 0xC5, 0xAF, 0xE6, 0x9D, 0x6E, 0x17, 0xC0, 0x00, 0x23 }, + { 0xA5, 0x9B, 0x78, 0xC4, 0x90, 0x57, 0x44, 0x07, 0x6B, 0xFE, 0xE8, 0x94, 0xDE, 0x70, 0x7D, 0x4F, 0x12, 0x0B, 0x5C, 0x68, 0x93, 0xEA, 0x04, 0x00, 0x29, 0x7D, 0x0B, 0xB8, 0x34, 0x72, 0x76, 0x32 }, + { 0x59, 0xDC, 0x78, 0xB1, 0x05, 0x64, 0x97, 0x07, 0xA2, 0xBB, 0x44, 0x19, 0xC4, 0x8F, 0x00, 0x54, 0x00, 0xD3, 0x97, 0x3D, 0xE3, 0x73, 0x66, 0x10, 0x23, 0x04, 0x35, 0xB1, 0x04, 0x24, 0xB2, 0x4F }, + { 0xC0, 0x14, 0x9D, 0x1D, 0x7E, 0x7A, 0x63, 0x53, 0xA6, 0xD9, 0x06, 0xEF, 0xE7, 0x28, 0xF2, 0xF3, 0x29, 0xFE, 0x14, 0xA4, 0x14, 0x9A, 0x3E, 0xA7, 0x76, 0x09, 0xBC, 0x42, 0xB9, 0x75, 0xDD, 0xFA }, + { 0xA3, 0x2F, 0x24, 0x14, 0x74, 0xA6, 0xC1, 0x69, 0x32, 0xE9, 0x24, 0x3B, 0xE0, 0xCF, 0x09, 0xBC, 0xDC, 0x7E, 0x0C, 0xA0, 0xE7, 0xA6, 0xA1, 0xB9, 0xB1, 0xA0, 0xF0, 0x1E, 0x41, 0x50, 0x23, 0x77 }, + { 0xB2, 0x39, 0xB2, 0xE4, 0xF8, 0x18, 0x41, 0x36, 0x1C, 0x13, 0x39, 0xF6, 0x8E, 0x2C, 0x35, 0x9F, 0x92, 0x9A, 0xF9, 0xAD, 0x9F, 0x34, 0xE0, 0x1A, 0xAB, 0x46, 0x31, 0xAD, 0x6D, 0x55, 0x00, 0xB0 }, + { 0x85, 0xFB, 0x41, 0x9C, 0x70, 0x02, 0xA3, 0xE0, 0xB4, 0xB6, 0xEA, 0x09, 0x3B, 0x4C, 0x1A, 0xC6, 0x93, 0x66, 0x45, 0xB6, 0x5D, 0xAC, 0x5A, 0xC1, 0x5A, 0x85, 0x28, 0xB7, 0xB9, 0x4C, 0x17, 0x54 }, + { 0x96, 0x19, 0x72, 0x06, 0x25, 0xF1, 0x90, 0xB9, 0x3A, 0x3F, 0xAD, 0x18, 0x6A, 0xB3, 0x14, 0x18, 0x96, 0x33, 0xC0, 0xD3, 0xA0, 0x1E, 0x6F, 0x9B, 0xC8, 0xC4, 0xA8, 0xF8, 0x2F, 0x38, 0x3D, 0xBF }, + { 0x7D, 0x62, 0x0D, 0x90, 0xFE, 0x69, 0xFA, 0x46, 0x9A, 0x65, 0x38, 0x38, 0x89, 0x70, 0xA1, 0xAA, 0x09, 0xBB, 0x48, 0xA2, 0xD5, 0x9B, 0x34, 0x7B, 0x97, 0xE8, 0xCE, 0x71, 0xF4, 0x8C, 0x7F, 0x46 }, + { 0x29, 0x43, 0x83, 0x56, 0x85, 0x96, 0xFB, 0x37, 0xC7, 0x5B, 0xBA, 0xCD, 0x97, 0x9C, 0x5F, 0xF6, 0xF2, 0x0A, 0x55, 0x6B, 0xF8, 0x87, 0x9C, 0xC7, 0x29, 0x24, 0x85, 0x5D, 0xF9, 0xB8, 0x24, 0x0E }, + { 0x16, 0xB1, 0x8A, 0xB3, 0x14, 0x35, 0x9C, 0x2B, 0x83, 0x3C, 0x1C, 0x69, 0x86, 0xD4, 0x8C, 0x55, 0xA9, 0xFC, 0x97, 0xCD, 0xE9, 0xA3, 0xC1, 0xF1, 0x0A, 0x31, 0x77, 0x14, 0x0F, 0x73, 0xF7, 0x38 }, + { 0x8C, 0xBB, 0xDD, 0x14, 0xBC, 0x33, 0xF0, 0x4C, 0xF4, 0x58, 0x13, 0xE4, 0xA1, 0x53, 0xA2, 0x73, 0xD3, 0x6A, 0xDA, 0xD5, 0xCE, 0x71, 0xF4, 0x99, 0xEE, 0xB8, 0x7F, 0xB8, 0xAC, 0x63, 0xB7, 0x29 }, + { 0x69, 0xC9, 0xA4, 0x98, 0xDB, 0x17, 0x4E, 0xCA, 0xEF, 0xCC, 0x5A, 0x3A, 0xC9, 0xFD, 0xED, 0xF0, 0xF8, 0x13, 0xA5, 0xBE, 0xC7, 0x27, 0xF1, 0xE7, 0x75, 0xBA, 0xBD, 0xEC, 0x77, 0x18, 0x81, 0x6E }, + { 0xB4, 0x62, 0xC3, 0xBE, 0x40, 0x44, 0x8F, 0x1D, 0x4F, 0x80, 0x62, 0x62, 0x54, 0xE5, 0x35, 0xB0, 0x8B, 0xC9, 0xCD, 0xCF, 0xF5, 0x99, 0xA7, 0x68, 0x57, 0x8D, 0x4B, 0x28, 0x81, 0xA8, 0xE3, 0xF0 }, + { 0x55, 0x3E, 0x9D, 0x9C, 0x5F, 0x36, 0x0A, 0xC0, 0xB7, 0x4A, 0x7D, 0x44, 0xE5, 0xA3, 0x91, 0xDA, 0xD4, 0xCE, 0xD0, 0x3E, 0x0C, 0x24, 0x18, 0x3B, 0x7E, 0x8E, 0xCA, 0xBD, 0xF1, 0x71, 0x5A, 0x64 }, + { 0x7A, 0x7C, 0x55, 0xA5, 0x6F, 0xA9, 0xAE, 0x51, 0xE6, 0x55, 0xE0, 0x19, 0x75, 0xD8, 0xA6, 0xFF, 0x4A, 0xE9, 0xE4, 0xB4, 0x86, 0xFC, 0xBE, 0x4E, 0xAC, 0x04, 0x45, 0x88, 0xF2, 0x45, 0xEB, 0xEA }, + { 0x2A, 0xFD, 0xF3, 0xC8, 0x2A, 0xBC, 0x48, 0x67, 0xF5, 0xDE, 0x11, 0x12, 0x86, 0xC2, 0xB3, 0xBE, 0x7D, 0x6E, 0x48, 0x65, 0x7B, 0xA9, 0x23, 0xCF, 0xBF, 0x10, 0x1A, 0x6D, 0xFC, 0xF9, 0xDB, 0x9A }, + { 0x41, 0x03, 0x7D, 0x2E, 0xDC, 0xDC, 0xE0, 0xC4, 0x9B, 0x7F, 0xB4, 0xA6, 0xAA, 0x09, 0x99, 0xCA, 0x66, 0x97, 0x6C, 0x74, 0x83, 0xAF, 0xE6, 0x31, 0xD4, 0xED, 0xA2, 0x83, 0x14, 0x4F, 0x6D, 0xFC }, + { 0xC4, 0x46, 0x6F, 0x84, 0x97, 0xCA, 0x2E, 0xEB, 0x45, 0x83, 0xA0, 0xB0, 0x8E, 0x9D, 0x9A, 0xC7, 0x43, 0x95, 0x70, 0x9F, 0xDA, 0x10, 0x9D, 0x24, 0xF2, 0xE4, 0x46, 0x21, 0x96, 0x77, 0x9C, 0x5D }, + { 0x75, 0xF6, 0x09, 0x33, 0x8A, 0xA6, 0x7D, 0x96, 0x9A, 0x2A, 0xE2, 0xA2, 0x36, 0x2B, 0x2D, 0xA9, 0xD7, 0x7C, 0x69, 0x5D, 0xFD, 0x1D, 0xF7, 0x22, 0x4A, 0x69, 0x01, 0xDB, 0x93, 0x2C, 0x33, 0x64 }, + { 0x68, 0x60, 0x6C, 0xEB, 0x98, 0x9D, 0x54, 0x88, 0xFC, 0x7C, 0xF6, 0x49, 0xF3, 0xD7, 0xC2, 0x72, 0xEF, 0x05, 0x5D, 0xA1, 0xA9, 0x3F, 0xAE, 0xCD, 0x55, 0xFE, 0x06, 0xF6, 0x96, 0x70, 0x98, 0xCA }, + { 0x44, 0x34, 0x6B, 0xDE, 0xB7, 0xE0, 0x52, 0xF6, 0x25, 0x50, 0x48, 0xF0, 0xD9, 0xB4, 0x2C, 0x42, 0x5B, 0xAB, 0x9C, 0x3D, 0xD2, 0x41, 0x68, 0x21, 0x2C, 0x3E, 0xCF, 0x1E, 0xBF, 0x34, 0xE6, 0xAE }, + { 0x8E, 0x9C, 0xF6, 0xE1, 0xF3, 0x66, 0x47, 0x1F, 0x2A, 0xC7, 0xD2, 0xEE, 0x9B, 0x5E, 0x62, 0x66, 0xFD, 0xA7, 0x1F, 0x8F, 0x2E, 0x41, 0x09, 0xF2, 0x23, 0x7E, 0xD5, 0xF8, 0x81, 0x3F, 0xC7, 0x18 }, + { 0x84, 0xBB, 0xEB, 0x84, 0x06, 0xD2, 0x50, 0x95, 0x1F, 0x8C, 0x1B, 0x3E, 0x86, 0xA7, 0xC0, 0x10, 0x08, 0x29, 0x21, 0x83, 0x3D, 0xFD, 0x95, 0x55, 0xA2, 0xF9, 0x09, 0xB1, 0x08, 0x6E, 0xB4, 0xB8 }, + { 0xEE, 0x66, 0x6F, 0x3E, 0xEF, 0x0F, 0x7E, 0x2A, 0x9C, 0x22, 0x29, 0x58, 0xC9, 0x7E, 0xAF, 0x35, 0xF5, 0x1C, 0xED, 0x39, 0x3D, 0x71, 0x44, 0x85, 0xAB, 0x09, 0xA0, 0x69, 0x34, 0x0F, 0xDF, 0x88 }, + { 0xC1, 0x53, 0xD3, 0x4A, 0x65, 0xC4, 0x7B, 0x4A, 0x62, 0xC5, 0xCA, 0xCF, 0x24, 0x01, 0x09, 0x75, 0xD0, 0x35, 0x6B, 0x2F, 0x32, 0xC8, 0xF5, 0xDA, 0x53, 0x0D, 0x33, 0x88, 0x16, 0xAD, 0x5D, 0xE6 }, + { 0x9F, 0xC5, 0x45, 0x01, 0x09, 0xE1, 0xB7, 0x79, 0xF6, 0xC7, 0xAE, 0x79, 0xD5, 0x6C, 0x27, 0x63, 0x5C, 0x8D, 0xD4, 0x26, 0xC5, 0xA9, 0xD5, 0x4E, 0x25, 0x78, 0xDB, 0x98, 0x9B, 0x8C, 0x3B, 0x4E }, + { 0xD1, 0x2B, 0xF3, 0x73, 0x2E, 0xF4, 0xAF, 0x5C, 0x22, 0xFA, 0x90, 0x35, 0x6A, 0xF8, 0xFC, 0x50, 0xFC, 0xB4, 0x0F, 0x8F, 0x2E, 0xA5, 0xC8, 0x59, 0x47, 0x37, 0xA3, 0xB3, 0xD5, 0xAB, 0xDB, 0xD7 }, + { 0x11, 0x03, 0x0B, 0x92, 0x89, 0xBB, 0xA5, 0xAF, 0x65, 0x26, 0x06, 0x72, 0xAB, 0x6F, 0xEE, 0x88, 0xB8, 0x74, 0x20, 0xAC, 0xEF, 0x4A, 0x17, 0x89, 0xA2, 0x07, 0x3B, 0x7E, 0xC2, 0xF2, 0xA0, 0x9E }, + { 0x69, 0xCB, 0x19, 0x2B, 0x84, 0x44, 0x00, 0x5C, 0x8C, 0x0C, 0xEB, 0x12, 0xC8, 0x46, 0x86, 0x07, 0x68, 0x18, 0x8C, 0xDA, 0x0A, 0xEC, 0x27, 0xA9, 0xC8, 0xA5, 0x5C, 0xDE, 0xE2, 0x12, 0x36, 0x32 }, + { 0xDB, 0x44, 0x4C, 0x15, 0x59, 0x7B, 0x5F, 0x1A, 0x03, 0xD1, 0xF9, 0xED, 0xD1, 0x6E, 0x4A, 0x9F, 0x43, 0xA6, 0x67, 0xCC, 0x27, 0x51, 0x75, 0xDF, 0xA2, 0xB7, 0x04, 0xE3, 0xBB, 0x1A, 0x9B, 0x83 }, + { 0x3F, 0xB7, 0x35, 0x06, 0x1A, 0xBC, 0x51, 0x9D, 0xFE, 0x97, 0x9E, 0x54, 0xC1, 0xEE, 0x5B, 0xFA, 0xD0, 0xA9, 0xD8, 0x58, 0xB3, 0x31, 0x5B, 0xAD, 0x34, 0xBD, 0xE9, 0x99, 0xEF, 0xD7, 0x24, 0xDD } +}; + +void blake2s_selftest(void) +{ + uint8_t key[BLAKE2S_KEYBYTES]; + uint8_t buf[ARRAY_SIZE(blake2s_testvecs)]; + uint8_t hash[BLAKE2S_OUTBYTES]; + size_t i; + bool success = true; + + for (i = 0; i < BLAKE2S_KEYBYTES; ++i) + key[i] = (uint8_t)i; + + for (i = 0; i < ARRAY_SIZE(blake2s_testvecs); ++i) + buf[i] = (uint8_t)i; + + for (i = 0; i < ARRAY_SIZE(blake2s_keyed_testvecs); ++i) { + blake2s(hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES); + if (memcmp(hash, blake2s_keyed_testvecs[i], BLAKE2S_OUTBYTES)) { + pr_info("blake2s keyed self-test %zu: FAIL\n", i + 1); + success = false; + } + } + + for (i = 0; i < ARRAY_SIZE(blake2s_testvecs); ++i) { + blake2s(hash, buf, NULL, BLAKE2S_OUTBYTES, i, 0); + if (memcmp(hash, blake2s_testvecs[i], BLAKE2S_OUTBYTES)) { + pr_info("blake2s unkeyed self-test %zu: FAIL\n", i + i); + success = false; + } + } + + if (success) + pr_info("blake2s self-tests: pass\n"); +} +#endif diff --git a/src/crypto/blake2s.h b/src/crypto/blake2s.h new file mode 100644 index 0000000..1b42141 --- /dev/null +++ b/src/crypto/blake2s.h @@ -0,0 +1,36 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef BLAKE2S_H +#define BLAKE2S_H + +#include + +enum blake2s_lengths { + BLAKE2S_BLOCKBYTES = 64, + BLAKE2S_OUTBYTES = 32, + BLAKE2S_KEYBYTES = 32 +}; + +struct blake2s_state { + uint32_t h[8]; + uint32_t t[2]; + uint32_t f[2]; + uint8_t buf[2 * BLAKE2S_BLOCKBYTES]; + size_t buflen; + uint8_t last_node; +}; + +void blake2s(uint8_t *out, const uint8_t *in, const uint8_t *key, const uint8_t outlen, const uint64_t inlen, const uint8_t keylen); + +void blake2s_init(struct blake2s_state *state, const uint8_t outlen); +void blake2s_init_key(struct blake2s_state *state, const uint8_t outlen, const void *key, const uint8_t keylen); +void blake2s_update(struct blake2s_state *state, const uint8_t *in, uint64_t inlen); +void blake2s_final(struct blake2s_state *state, uint8_t *out, uint8_t outlen); + +void blake2s_hmac(uint8_t *out, const uint8_t *in, const uint8_t *key, const uint8_t outlen, const uint64_t inlen, const uint64_t keylen); + +#ifdef DEBUG +void blake2s_selftest(void); +#endif + +#endif diff --git a/src/crypto/chacha20-avx2-x86_64.S b/src/crypto/chacha20-avx2-x86_64.S new file mode 100644 index 0000000..2bbbc98 --- /dev/null +++ b/src/crypto/chacha20-avx2-x86_64.S @@ -0,0 +1,443 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, x64 AVX2 functions + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include + +.data +.align 32 + +ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003 + .octa 0x0e0d0c0f0a09080b0605040702010003 +ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302 + .octa 0x0d0c0f0e09080b0a0504070601000302 +CTRINC: .octa 0x00000003000000020000000100000000 + .octa 0x00000007000000060000000500000004 + +.text + +ENTRY(chacha20_asm_8block_xor_avx2) + # %rdi: Input state matrix, s + # %rsi: 8 data blocks output, o + # %rdx: 8 data blocks input, i + + # This function encrypts eight consecutive ChaCha20 blocks by loading + # the state matrix in AVX registers eight times. As we need some + # scratch registers, we save the first four registers on the stack. The + # algorithm performs each operation on the corresponding word of each + # state matrix, hence requires no word shuffling. For final XORing step + # we transpose the matrix by interleaving 32-, 64- and then 128-bit + # words, which allows us to do XOR in AVX registers. 8/16-bit word + # rotation is done with the slightly better performing byte shuffling, + # 7/12-bit word rotation uses traditional shift+OR. + + vzeroupper + # 4 * 32 byte stack, 32-byte aligned + mov %rsp, %r8 + and $~31, %rsp + sub $0x80, %rsp + + # x0..15[0-7] = s[0..15] + vpbroadcastd 0x00(%rdi),%ymm0 + vpbroadcastd 0x04(%rdi),%ymm1 + vpbroadcastd 0x08(%rdi),%ymm2 + vpbroadcastd 0x0c(%rdi),%ymm3 + vpbroadcastd 0x10(%rdi),%ymm4 + vpbroadcastd 0x14(%rdi),%ymm5 + vpbroadcastd 0x18(%rdi),%ymm6 + vpbroadcastd 0x1c(%rdi),%ymm7 + vpbroadcastd 0x20(%rdi),%ymm8 + vpbroadcastd 0x24(%rdi),%ymm9 + vpbroadcastd 0x28(%rdi),%ymm10 + vpbroadcastd 0x2c(%rdi),%ymm11 + vpbroadcastd 0x30(%rdi),%ymm12 + vpbroadcastd 0x34(%rdi),%ymm13 + vpbroadcastd 0x38(%rdi),%ymm14 + vpbroadcastd 0x3c(%rdi),%ymm15 + # x0..3 on stack + vmovdqa %ymm0,0x00(%rsp) + vmovdqa %ymm1,0x20(%rsp) + vmovdqa %ymm2,0x40(%rsp) + vmovdqa %ymm3,0x60(%rsp) + + vmovdqa CTRINC(%rip),%ymm1 + vmovdqa ROT8(%rip),%ymm2 + vmovdqa ROT16(%rip),%ymm3 + + # x12 += counter values 0-3 + vpaddd %ymm1,%ymm12,%ymm12 + + mov $10,%ecx + +.Ldoubleround8: + # x0 += x4, x12 = rotl32(x12 ^ x0, 16) + vpaddd 0x00(%rsp),%ymm4,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm3,%ymm12,%ymm12 + # x1 += x5, x13 = rotl32(x13 ^ x1, 16) + vpaddd 0x20(%rsp),%ymm5,%ymm0 + vmovdqa %ymm0,0x20(%rsp) + vpxor %ymm0,%ymm13,%ymm13 + vpshufb %ymm3,%ymm13,%ymm13 + # x2 += x6, x14 = rotl32(x14 ^ x2, 16) + vpaddd 0x40(%rsp),%ymm6,%ymm0 + vmovdqa %ymm0,0x40(%rsp) + vpxor %ymm0,%ymm14,%ymm14 + vpshufb %ymm3,%ymm14,%ymm14 + # x3 += x7, x15 = rotl32(x15 ^ x3, 16) + vpaddd 0x60(%rsp),%ymm7,%ymm0 + vmovdqa %ymm0,0x60(%rsp) + vpxor %ymm0,%ymm15,%ymm15 + vpshufb %ymm3,%ymm15,%ymm15 + + # x8 += x12, x4 = rotl32(x4 ^ x8, 12) + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $12,%ymm4,%ymm0 + vpsrld $20,%ymm4,%ymm4 + vpor %ymm0,%ymm4,%ymm4 + # x9 += x13, x5 = rotl32(x5 ^ x9, 12) + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $12,%ymm5,%ymm0 + vpsrld $20,%ymm5,%ymm5 + vpor %ymm0,%ymm5,%ymm5 + # x10 += x14, x6 = rotl32(x6 ^ x10, 12) + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $12,%ymm6,%ymm0 + vpsrld $20,%ymm6,%ymm6 + vpor %ymm0,%ymm6,%ymm6 + # x11 += x15, x7 = rotl32(x7 ^ x11, 12) + vpaddd %ymm15,%ymm11,%ymm11 + vpxor %ymm11,%ymm7,%ymm7 + vpslld $12,%ymm7,%ymm0 + vpsrld $20,%ymm7,%ymm7 + vpor %ymm0,%ymm7,%ymm7 + + # x0 += x4, x12 = rotl32(x12 ^ x0, 8) + vpaddd 0x00(%rsp),%ymm4,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm2,%ymm12,%ymm12 + # x1 += x5, x13 = rotl32(x13 ^ x1, 8) + vpaddd 0x20(%rsp),%ymm5,%ymm0 + vmovdqa %ymm0,0x20(%rsp) + vpxor %ymm0,%ymm13,%ymm13 + vpshufb %ymm2,%ymm13,%ymm13 + # x2 += x6, x14 = rotl32(x14 ^ x2, 8) + vpaddd 0x40(%rsp),%ymm6,%ymm0 + vmovdqa %ymm0,0x40(%rsp) + vpxor %ymm0,%ymm14,%ymm14 + vpshufb %ymm2,%ymm14,%ymm14 + # x3 += x7, x15 = rotl32(x15 ^ x3, 8) + vpaddd 0x60(%rsp),%ymm7,%ymm0 + vmovdqa %ymm0,0x60(%rsp) + vpxor %ymm0,%ymm15,%ymm15 + vpshufb %ymm2,%ymm15,%ymm15 + + # x8 += x12, x4 = rotl32(x4 ^ x8, 7) + vpaddd %ymm12,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm0 + vpsrld $25,%ymm4,%ymm4 + vpor %ymm0,%ymm4,%ymm4 + # x9 += x13, x5 = rotl32(x5 ^ x9, 7) + vpaddd %ymm13,%ymm9,%ymm9 + vpxor %ymm9,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm0 + vpsrld $25,%ymm5,%ymm5 + vpor %ymm0,%ymm5,%ymm5 + # x10 += x14, x6 = rotl32(x6 ^ x10, 7) + vpaddd %ymm14,%ymm10,%ymm10 + vpxor %ymm10,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm0 + vpsrld $25,%ymm6,%ymm6 + vpor %ymm0,%ymm6,%ymm6 + # x11 += x15, x7 = rotl32(x7 ^ x11, 7) + vpaddd %ymm15,%ymm11,%ymm11 + vpxor %ymm11,%ymm7,%ymm7 + vpslld $7,%ymm7,%ymm0 + vpsrld $25,%ymm7,%ymm7 + vpor %ymm0,%ymm7,%ymm7 + + # x0 += x5, x15 = rotl32(x15 ^ x0, 16) + vpaddd 0x00(%rsp),%ymm5,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + vpxor %ymm0,%ymm15,%ymm15 + vpshufb %ymm3,%ymm15,%ymm15 + # x1 += x6, x12 = rotl32(x12 ^ x1, 16)%ymm0 + vpaddd 0x20(%rsp),%ymm6,%ymm0 + vmovdqa %ymm0,0x20(%rsp) + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm3,%ymm12,%ymm12 + # x2 += x7, x13 = rotl32(x13 ^ x2, 16) + vpaddd 0x40(%rsp),%ymm7,%ymm0 + vmovdqa %ymm0,0x40(%rsp) + vpxor %ymm0,%ymm13,%ymm13 + vpshufb %ymm3,%ymm13,%ymm13 + # x3 += x4, x14 = rotl32(x14 ^ x3, 16) + vpaddd 0x60(%rsp),%ymm4,%ymm0 + vmovdqa %ymm0,0x60(%rsp) + vpxor %ymm0,%ymm14,%ymm14 + vpshufb %ymm3,%ymm14,%ymm14 + + # x10 += x15, x5 = rotl32(x5 ^ x10, 12) + vpaddd %ymm15,%ymm10,%ymm10 + vpxor %ymm10,%ymm5,%ymm5 + vpslld $12,%ymm5,%ymm0 + vpsrld $20,%ymm5,%ymm5 + vpor %ymm0,%ymm5,%ymm5 + # x11 += x12, x6 = rotl32(x6 ^ x11, 12) + vpaddd %ymm12,%ymm11,%ymm11 + vpxor %ymm11,%ymm6,%ymm6 + vpslld $12,%ymm6,%ymm0 + vpsrld $20,%ymm6,%ymm6 + vpor %ymm0,%ymm6,%ymm6 + # x8 += x13, x7 = rotl32(x7 ^ x8, 12) + vpaddd %ymm13,%ymm8,%ymm8 + vpxor %ymm8,%ymm7,%ymm7 + vpslld $12,%ymm7,%ymm0 + vpsrld $20,%ymm7,%ymm7 + vpor %ymm0,%ymm7,%ymm7 + # x9 += x14, x4 = rotl32(x4 ^ x9, 12) + vpaddd %ymm14,%ymm9,%ymm9 + vpxor %ymm9,%ymm4,%ymm4 + vpslld $12,%ymm4,%ymm0 + vpsrld $20,%ymm4,%ymm4 + vpor %ymm0,%ymm4,%ymm4 + + # x0 += x5, x15 = rotl32(x15 ^ x0, 8) + vpaddd 0x00(%rsp),%ymm5,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + vpxor %ymm0,%ymm15,%ymm15 + vpshufb %ymm2,%ymm15,%ymm15 + # x1 += x6, x12 = rotl32(x12 ^ x1, 8) + vpaddd 0x20(%rsp),%ymm6,%ymm0 + vmovdqa %ymm0,0x20(%rsp) + vpxor %ymm0,%ymm12,%ymm12 + vpshufb %ymm2,%ymm12,%ymm12 + # x2 += x7, x13 = rotl32(x13 ^ x2, 8) + vpaddd 0x40(%rsp),%ymm7,%ymm0 + vmovdqa %ymm0,0x40(%rsp) + vpxor %ymm0,%ymm13,%ymm13 + vpshufb %ymm2,%ymm13,%ymm13 + # x3 += x4, x14 = rotl32(x14 ^ x3, 8) + vpaddd 0x60(%rsp),%ymm4,%ymm0 + vmovdqa %ymm0,0x60(%rsp) + vpxor %ymm0,%ymm14,%ymm14 + vpshufb %ymm2,%ymm14,%ymm14 + + # x10 += x15, x5 = rotl32(x5 ^ x10, 7) + vpaddd %ymm15,%ymm10,%ymm10 + vpxor %ymm10,%ymm5,%ymm5 + vpslld $7,%ymm5,%ymm0 + vpsrld $25,%ymm5,%ymm5 + vpor %ymm0,%ymm5,%ymm5 + # x11 += x12, x6 = rotl32(x6 ^ x11, 7) + vpaddd %ymm12,%ymm11,%ymm11 + vpxor %ymm11,%ymm6,%ymm6 + vpslld $7,%ymm6,%ymm0 + vpsrld $25,%ymm6,%ymm6 + vpor %ymm0,%ymm6,%ymm6 + # x8 += x13, x7 = rotl32(x7 ^ x8, 7) + vpaddd %ymm13,%ymm8,%ymm8 + vpxor %ymm8,%ymm7,%ymm7 + vpslld $7,%ymm7,%ymm0 + vpsrld $25,%ymm7,%ymm7 + vpor %ymm0,%ymm7,%ymm7 + # x9 += x14, x4 = rotl32(x4 ^ x9, 7) + vpaddd %ymm14,%ymm9,%ymm9 + vpxor %ymm9,%ymm4,%ymm4 + vpslld $7,%ymm4,%ymm0 + vpsrld $25,%ymm4,%ymm4 + vpor %ymm0,%ymm4,%ymm4 + + dec %ecx + jnz .Ldoubleround8 + + # x0..15[0-3] += s[0..15] + vpbroadcastd 0x00(%rdi),%ymm0 + vpaddd 0x00(%rsp),%ymm0,%ymm0 + vmovdqa %ymm0,0x00(%rsp) + vpbroadcastd 0x04(%rdi),%ymm0 + vpaddd 0x20(%rsp),%ymm0,%ymm0 + vmovdqa %ymm0,0x20(%rsp) + vpbroadcastd 0x08(%rdi),%ymm0 + vpaddd 0x40(%rsp),%ymm0,%ymm0 + vmovdqa %ymm0,0x40(%rsp) + vpbroadcastd 0x0c(%rdi),%ymm0 + vpaddd 0x60(%rsp),%ymm0,%ymm0 + vmovdqa %ymm0,0x60(%rsp) + vpbroadcastd 0x10(%rdi),%ymm0 + vpaddd %ymm0,%ymm4,%ymm4 + vpbroadcastd 0x14(%rdi),%ymm0 + vpaddd %ymm0,%ymm5,%ymm5 + vpbroadcastd 0x18(%rdi),%ymm0 + vpaddd %ymm0,%ymm6,%ymm6 + vpbroadcastd 0x1c(%rdi),%ymm0 + vpaddd %ymm0,%ymm7,%ymm7 + vpbroadcastd 0x20(%rdi),%ymm0 + vpaddd %ymm0,%ymm8,%ymm8 + vpbroadcastd 0x24(%rdi),%ymm0 + vpaddd %ymm0,%ymm9,%ymm9 + vpbroadcastd 0x28(%rdi),%ymm0 + vpaddd %ymm0,%ymm10,%ymm10 + vpbroadcastd 0x2c(%rdi),%ymm0 + vpaddd %ymm0,%ymm11,%ymm11 + vpbroadcastd 0x30(%rdi),%ymm0 + vpaddd %ymm0,%ymm12,%ymm12 + vpbroadcastd 0x34(%rdi),%ymm0 + vpaddd %ymm0,%ymm13,%ymm13 + vpbroadcastd 0x38(%rdi),%ymm0 + vpaddd %ymm0,%ymm14,%ymm14 + vpbroadcastd 0x3c(%rdi),%ymm0 + vpaddd %ymm0,%ymm15,%ymm15 + + # x12 += counter values 0-3 + vpaddd %ymm1,%ymm12,%ymm12 + + # interleave 32-bit words in state n, n+1 + vmovdqa 0x00(%rsp),%ymm0 + vmovdqa 0x20(%rsp),%ymm1 + vpunpckldq %ymm1,%ymm0,%ymm2 + vpunpckhdq %ymm1,%ymm0,%ymm1 + vmovdqa %ymm2,0x00(%rsp) + vmovdqa %ymm1,0x20(%rsp) + vmovdqa 0x40(%rsp),%ymm0 + vmovdqa 0x60(%rsp),%ymm1 + vpunpckldq %ymm1,%ymm0,%ymm2 + vpunpckhdq %ymm1,%ymm0,%ymm1 + vmovdqa %ymm2,0x40(%rsp) + vmovdqa %ymm1,0x60(%rsp) + vmovdqa %ymm4,%ymm0 + vpunpckldq %ymm5,%ymm0,%ymm4 + vpunpckhdq %ymm5,%ymm0,%ymm5 + vmovdqa %ymm6,%ymm0 + vpunpckldq %ymm7,%ymm0,%ymm6 + vpunpckhdq %ymm7,%ymm0,%ymm7 + vmovdqa %ymm8,%ymm0 + vpunpckldq %ymm9,%ymm0,%ymm8 + vpunpckhdq %ymm9,%ymm0,%ymm9 + vmovdqa %ymm10,%ymm0 + vpunpckldq %ymm11,%ymm0,%ymm10 + vpunpckhdq %ymm11,%ymm0,%ymm11 + vmovdqa %ymm12,%ymm0 + vpunpckldq %ymm13,%ymm0,%ymm12 + vpunpckhdq %ymm13,%ymm0,%ymm13 + vmovdqa %ymm14,%ymm0 + vpunpckldq %ymm15,%ymm0,%ymm14 + vpunpckhdq %ymm15,%ymm0,%ymm15 + + # interleave 64-bit words in state n, n+2 + vmovdqa 0x00(%rsp),%ymm0 + vmovdqa 0x40(%rsp),%ymm2 + vpunpcklqdq %ymm2,%ymm0,%ymm1 + vpunpckhqdq %ymm2,%ymm0,%ymm2 + vmovdqa %ymm1,0x00(%rsp) + vmovdqa %ymm2,0x40(%rsp) + vmovdqa 0x20(%rsp),%ymm0 + vmovdqa 0x60(%rsp),%ymm2 + vpunpcklqdq %ymm2,%ymm0,%ymm1 + vpunpckhqdq %ymm2,%ymm0,%ymm2 + vmovdqa %ymm1,0x20(%rsp) + vmovdqa %ymm2,0x60(%rsp) + vmovdqa %ymm4,%ymm0 + vpunpcklqdq %ymm6,%ymm0,%ymm4 + vpunpckhqdq %ymm6,%ymm0,%ymm6 + vmovdqa %ymm5,%ymm0 + vpunpcklqdq %ymm7,%ymm0,%ymm5 + vpunpckhqdq %ymm7,%ymm0,%ymm7 + vmovdqa %ymm8,%ymm0 + vpunpcklqdq %ymm10,%ymm0,%ymm8 + vpunpckhqdq %ymm10,%ymm0,%ymm10 + vmovdqa %ymm9,%ymm0 + vpunpcklqdq %ymm11,%ymm0,%ymm9 + vpunpckhqdq %ymm11,%ymm0,%ymm11 + vmovdqa %ymm12,%ymm0 + vpunpcklqdq %ymm14,%ymm0,%ymm12 + vpunpckhqdq %ymm14,%ymm0,%ymm14 + vmovdqa %ymm13,%ymm0 + vpunpcklqdq %ymm15,%ymm0,%ymm13 + vpunpckhqdq %ymm15,%ymm0,%ymm15 + + # interleave 128-bit words in state n, n+4 + vmovdqa 0x00(%rsp),%ymm0 + vperm2i128 $0x20,%ymm4,%ymm0,%ymm1 + vperm2i128 $0x31,%ymm4,%ymm0,%ymm4 + vmovdqa %ymm1,0x00(%rsp) + vmovdqa 0x20(%rsp),%ymm0 + vperm2i128 $0x20,%ymm5,%ymm0,%ymm1 + vperm2i128 $0x31,%ymm5,%ymm0,%ymm5 + vmovdqa %ymm1,0x20(%rsp) + vmovdqa 0x40(%rsp),%ymm0 + vperm2i128 $0x20,%ymm6,%ymm0,%ymm1 + vperm2i128 $0x31,%ymm6,%ymm0,%ymm6 + vmovdqa %ymm1,0x40(%rsp) + vmovdqa 0x60(%rsp),%ymm0 + vperm2i128 $0x20,%ymm7,%ymm0,%ymm1 + vperm2i128 $0x31,%ymm7,%ymm0,%ymm7 + vmovdqa %ymm1,0x60(%rsp) + vperm2i128 $0x20,%ymm12,%ymm8,%ymm0 + vperm2i128 $0x31,%ymm12,%ymm8,%ymm12 + vmovdqa %ymm0,%ymm8 + vperm2i128 $0x20,%ymm13,%ymm9,%ymm0 + vperm2i128 $0x31,%ymm13,%ymm9,%ymm13 + vmovdqa %ymm0,%ymm9 + vperm2i128 $0x20,%ymm14,%ymm10,%ymm0 + vperm2i128 $0x31,%ymm14,%ymm10,%ymm14 + vmovdqa %ymm0,%ymm10 + vperm2i128 $0x20,%ymm15,%ymm11,%ymm0 + vperm2i128 $0x31,%ymm15,%ymm11,%ymm15 + vmovdqa %ymm0,%ymm11 + + # xor with corresponding input, write to output + vmovdqa 0x00(%rsp),%ymm0 + vpxor 0x0000(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0000(%rsi) + vmovdqa 0x20(%rsp),%ymm0 + vpxor 0x0080(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0080(%rsi) + vmovdqa 0x40(%rsp),%ymm0 + vpxor 0x0040(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x0040(%rsi) + vmovdqa 0x60(%rsp),%ymm0 + vpxor 0x00c0(%rdx),%ymm0,%ymm0 + vmovdqu %ymm0,0x00c0(%rsi) + vpxor 0x0100(%rdx),%ymm4,%ymm4 + vmovdqu %ymm4,0x0100(%rsi) + vpxor 0x0180(%rdx),%ymm5,%ymm5 + vmovdqu %ymm5,0x00180(%rsi) + vpxor 0x0140(%rdx),%ymm6,%ymm6 + vmovdqu %ymm6,0x0140(%rsi) + vpxor 0x01c0(%rdx),%ymm7,%ymm7 + vmovdqu %ymm7,0x01c0(%rsi) + vpxor 0x0020(%rdx),%ymm8,%ymm8 + vmovdqu %ymm8,0x0020(%rsi) + vpxor 0x00a0(%rdx),%ymm9,%ymm9 + vmovdqu %ymm9,0x00a0(%rsi) + vpxor 0x0060(%rdx),%ymm10,%ymm10 + vmovdqu %ymm10,0x0060(%rsi) + vpxor 0x00e0(%rdx),%ymm11,%ymm11 + vmovdqu %ymm11,0x00e0(%rsi) + vpxor 0x0120(%rdx),%ymm12,%ymm12 + vmovdqu %ymm12,0x0120(%rsi) + vpxor 0x01a0(%rdx),%ymm13,%ymm13 + vmovdqu %ymm13,0x01a0(%rsi) + vpxor 0x0160(%rdx),%ymm14,%ymm14 + vmovdqu %ymm14,0x0160(%rsi) + vpxor 0x01e0(%rdx),%ymm15,%ymm15 + vmovdqu %ymm15,0x01e0(%rsi) + + vzeroupper + mov %r8,%rsp + ret +ENDPROC(chacha20_asm_8block_xor_avx2) diff --git a/src/crypto/chacha20-ssse3-x86_64.S b/src/crypto/chacha20-ssse3-x86_64.S new file mode 100644 index 0000000..d7600b3 --- /dev/null +++ b/src/crypto/chacha20-ssse3-x86_64.S @@ -0,0 +1,627 @@ +/* + * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include + +.data +.align 16 + +ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003 +ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302 +CTRINC: .octa 0x00000003000000020000000100000000 + +.text + +ENTRY(chacha20_asm_block_xor_ssse3) + # %rdi: Input state matrix, s + # %rsi: 1 data block output, o + # %rdx: 1 data block input, i + + # This function encrypts one ChaCha20 block by loading the state matrix + # in four SSE registers. It performs matrix operation on four words in + # parallel, but requireds shuffling to rearrange the words after each + # round. 8/16-bit word rotation is done with the slightly better + # performing SSSE3 byte shuffling, 7/12-bit word rotation uses + # traditional shift+OR. + + # x0..3 = s0..3 + movdqa 0x00(%rdi),%xmm0 + movdqa 0x10(%rdi),%xmm1 + movdqa 0x20(%rdi),%xmm2 + movdqa 0x30(%rdi),%xmm3 + movdqa %xmm0,%xmm8 + movdqa %xmm1,%xmm9 + movdqa %xmm2,%xmm10 + movdqa %xmm3,%xmm11 + + movdqa ROT8(%rip),%xmm4 + movdqa ROT16(%rip),%xmm5 + + mov $10,%ecx + +.Ldoubleround: + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm5,%xmm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm6 + pslld $12,%xmm6 + psrld $20,%xmm1 + por %xmm6,%xmm1 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm4,%xmm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm7 + pslld $7,%xmm7 + psrld $25,%xmm1 + por %xmm7,%xmm1 + + # x1 = shuffle32(x1, MASK(0, 3, 2, 1)) + pshufd $0x39,%xmm1,%xmm1 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + pshufd $0x4e,%xmm2,%xmm2 + # x3 = shuffle32(x3, MASK(2, 1, 0, 3)) + pshufd $0x93,%xmm3,%xmm3 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 16) + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm5,%xmm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 12) + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm6 + pslld $12,%xmm6 + psrld $20,%xmm1 + por %xmm6,%xmm1 + + # x0 += x1, x3 = rotl32(x3 ^ x0, 8) + paddd %xmm1,%xmm0 + pxor %xmm0,%xmm3 + pshufb %xmm4,%xmm3 + + # x2 += x3, x1 = rotl32(x1 ^ x2, 7) + paddd %xmm3,%xmm2 + pxor %xmm2,%xmm1 + movdqa %xmm1,%xmm7 + pslld $7,%xmm7 + psrld $25,%xmm1 + por %xmm7,%xmm1 + + # x1 = shuffle32(x1, MASK(2, 1, 0, 3)) + pshufd $0x93,%xmm1,%xmm1 + # x2 = shuffle32(x2, MASK(1, 0, 3, 2)) + pshufd $0x4e,%xmm2,%xmm2 + # x3 = shuffle32(x3, MASK(0, 3, 2, 1)) + pshufd $0x39,%xmm3,%xmm3 + + dec %ecx + jnz .Ldoubleround + + # o0 = i0 ^ (x0 + s0) + movdqu 0x00(%rdx),%xmm4 + paddd %xmm8,%xmm0 + pxor %xmm4,%xmm0 + movdqu %xmm0,0x00(%rsi) + # o1 = i1 ^ (x1 + s1) + movdqu 0x10(%rdx),%xmm5 + paddd %xmm9,%xmm1 + pxor %xmm5,%xmm1 + movdqu %xmm1,0x10(%rsi) + # o2 = i2 ^ (x2 + s2) + movdqu 0x20(%rdx),%xmm6 + paddd %xmm10,%xmm2 + pxor %xmm6,%xmm2 + movdqu %xmm2,0x20(%rsi) + # o3 = i3 ^ (x3 + s3) + movdqu 0x30(%rdx),%xmm7 + paddd %xmm11,%xmm3 + pxor %xmm7,%xmm3 + movdqu %xmm3,0x30(%rsi) + + ret +ENDPROC(chacha20_asm_block_xor_ssse3) + +ENTRY(chacha20_asm_4block_xor_ssse3) + # %rdi: Input state matrix, s + # %rsi: 4 data blocks output, o + # %rdx: 4 data blocks input, i + + # This function encrypts four consecutive ChaCha20 blocks by loading the + # the state matrix in SSE registers four times. As we need some scratch + # registers, we save the first four registers on the stack. The + # algorithm performs each operation on the corresponding word of each + # state matrix, hence requires no word shuffling. For final XORing step + # we transpose the matrix by interleaving 32- and then 64-bit words, + # which allows us to do XOR in SSE registers. 8/16-bit word rotation is + # done with the slightly better performing SSSE3 byte shuffling, + # 7/12-bit word rotation uses traditional shift+OR. + + mov %rsp,%r11 + sub $0x80,%rsp + and $~63,%rsp + + # x0..15[0-3] = s0..3[0..3] + movq 0x00(%rdi),%xmm1 + pshufd $0x00,%xmm1,%xmm0 + pshufd $0x55,%xmm1,%xmm1 + movq 0x08(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + movq 0x10(%rdi),%xmm5 + pshufd $0x00,%xmm5,%xmm4 + pshufd $0x55,%xmm5,%xmm5 + movq 0x18(%rdi),%xmm7 + pshufd $0x00,%xmm7,%xmm6 + pshufd $0x55,%xmm7,%xmm7 + movq 0x20(%rdi),%xmm9 + pshufd $0x00,%xmm9,%xmm8 + pshufd $0x55,%xmm9,%xmm9 + movq 0x28(%rdi),%xmm11 + pshufd $0x00,%xmm11,%xmm10 + pshufd $0x55,%xmm11,%xmm11 + movq 0x30(%rdi),%xmm13 + pshufd $0x00,%xmm13,%xmm12 + pshufd $0x55,%xmm13,%xmm13 + movq 0x38(%rdi),%xmm15 + pshufd $0x00,%xmm15,%xmm14 + pshufd $0x55,%xmm15,%xmm15 + # x0..3 on stack + movdqa %xmm0,0x00(%rsp) + movdqa %xmm1,0x10(%rsp) + movdqa %xmm2,0x20(%rsp) + movdqa %xmm3,0x30(%rsp) + + movdqa CTRINC(%rip),%xmm1 + movdqa ROT8(%rip),%xmm2 + movdqa ROT16(%rip),%xmm3 + + # x12 += counter values 0-3 + paddd %xmm1,%xmm12 + + mov $10,%ecx + +.Ldoubleround4: + # x0 += x4, x12 = rotl32(x12 ^ x0, 16) + movdqa 0x00(%rsp),%xmm0 + paddd %xmm4,%xmm0 + movdqa %xmm0,0x00(%rsp) + pxor %xmm0,%xmm12 + pshufb %xmm3,%xmm12 + # x1 += x5, x13 = rotl32(x13 ^ x1, 16) + movdqa 0x10(%rsp),%xmm0 + paddd %xmm5,%xmm0 + movdqa %xmm0,0x10(%rsp) + pxor %xmm0,%xmm13 + pshufb %xmm3,%xmm13 + # x2 += x6, x14 = rotl32(x14 ^ x2, 16) + movdqa 0x20(%rsp),%xmm0 + paddd %xmm6,%xmm0 + movdqa %xmm0,0x20(%rsp) + pxor %xmm0,%xmm14 + pshufb %xmm3,%xmm14 + # x3 += x7, x15 = rotl32(x15 ^ x3, 16) + movdqa 0x30(%rsp),%xmm0 + paddd %xmm7,%xmm0 + movdqa %xmm0,0x30(%rsp) + pxor %xmm0,%xmm15 + pshufb %xmm3,%xmm15 + + # x8 += x12, x4 = rotl32(x4 ^ x8, 12) + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm0 + pslld $12,%xmm0 + psrld $20,%xmm4 + por %xmm0,%xmm4 + # x9 += x13, x5 = rotl32(x5 ^ x9, 12) + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm0 + pslld $12,%xmm0 + psrld $20,%xmm5 + por %xmm0,%xmm5 + # x10 += x14, x6 = rotl32(x6 ^ x10, 12) + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm0 + pslld $12,%xmm0 + psrld $20,%xmm6 + por %xmm0,%xmm6 + # x11 += x15, x7 = rotl32(x7 ^ x11, 12) + paddd %xmm15,%xmm11 + pxor %xmm11,%xmm7 + movdqa %xmm7,%xmm0 + pslld $12,%xmm0 + psrld $20,%xmm7 + por %xmm0,%xmm7 + + # x0 += x4, x12 = rotl32(x12 ^ x0, 8) + movdqa 0x00(%rsp),%xmm0 + paddd %xmm4,%xmm0 + movdqa %xmm0,0x00(%rsp) + pxor %xmm0,%xmm12 + pshufb %xmm2,%xmm12 + # x1 += x5, x13 = rotl32(x13 ^ x1, 8) + movdqa 0x10(%rsp),%xmm0 + paddd %xmm5,%xmm0 + movdqa %xmm0,0x10(%rsp) + pxor %xmm0,%xmm13 + pshufb %xmm2,%xmm13 + # x2 += x6, x14 = rotl32(x14 ^ x2, 8) + movdqa 0x20(%rsp),%xmm0 + paddd %xmm6,%xmm0 + movdqa %xmm0,0x20(%rsp) + pxor %xmm0,%xmm14 + pshufb %xmm2,%xmm14 + # x3 += x7, x15 = rotl32(x15 ^ x3, 8) + movdqa 0x30(%rsp),%xmm0 + paddd %xmm7,%xmm0 + movdqa %xmm0,0x30(%rsp) + pxor %xmm0,%xmm15 + pshufb %xmm2,%xmm15 + + # x8 += x12, x4 = rotl32(x4 ^ x8, 7) + paddd %xmm12,%xmm8 + pxor %xmm8,%xmm4 + movdqa %xmm4,%xmm0 + pslld $7,%xmm0 + psrld $25,%xmm4 + por %xmm0,%xmm4 + # x9 += x13, x5 = rotl32(x5 ^ x9, 7) + paddd %xmm13,%xmm9 + pxor %xmm9,%xmm5 + movdqa %xmm5,%xmm0 + pslld $7,%xmm0 + psrld $25,%xmm5 + por %xmm0,%xmm5 + # x10 += x14, x6 = rotl32(x6 ^ x10, 7) + paddd %xmm14,%xmm10 + pxor %xmm10,%xmm6 + movdqa %xmm6,%xmm0 + pslld $7,%xmm0 + psrld $25,%xmm6 + por %xmm0,%xmm6 + # x11 += x15, x7 = rotl32(x7 ^ x11, 7) + paddd %xmm15,%xmm11 + pxor %xmm11,%xmm7 + movdqa %xmm7,%xmm0 + pslld $7,%xmm0 + psrld $25,%xmm7 + por %xmm0,%xmm7 + + # x0 += x5, x15 = rotl32(x15 ^ x0, 16) + movdqa 0x00(%rsp),%xmm0 + paddd %xmm5,%xmm0 + movdqa %xmm0,0x00(%rsp) + pxor %xmm0,%xmm15 + pshufb %xmm3,%xmm15 + # x1 += x6, x12 = rotl32(x12 ^ x1, 16) + movdqa 0x10(%rsp),%xmm0 + paddd %xmm6,%xmm0 + movdqa %xmm0,0x10(%rsp) + pxor %xmm0,%xmm12 + pshufb %xmm3,%xmm12 + # x2 += x7, x13 = rotl32(x13 ^ x2, 16) + movdqa 0x20(%rsp),%xmm0 + paddd %xmm7,%xmm0 + movdqa %xmm0,0x20(%rsp) + pxor %xmm0,%xmm13 + pshufb %xmm3,%xmm13 + # x3 += x4, x14 = rotl32(x14 ^ x3, 16) + movdqa 0x30(%rsp),%xmm0 + paddd %xmm4,%xmm0 + movdqa %xmm0,0x30(%rsp) + pxor %xmm0,%xmm14 + pshufb %xmm3,%xmm14 + + # x10 += x15, x5 = rotl32(x5 ^ x10, 12) + paddd %xmm15,%xmm10 + pxor %xmm10,%xmm5 + movdqa %xmm5,%xmm0 + pslld $12,%xmm0 + psrld $20,%xmm5 + por %xmm0,%xmm5 + # x11 += x12, x6 = rotl32(x6 ^ x11, 12) + paddd %xmm12,%xmm11 + pxor %xmm11,%xmm6 + movdqa %xmm6,%xmm0 + pslld $12,%xmm0 + psrld $20,%xmm6 + por %xmm0,%xmm6 + # x8 += x13, x7 = rotl32(x7 ^ x8, 12) + paddd %xmm13,%xmm8 + pxor %xmm8,%xmm7 + movdqa %xmm7,%xmm0 + pslld $12,%xmm0 + psrld $20,%xmm7 + por %xmm0,%xmm7 + # x9 += x14, x4 = rotl32(x4 ^ x9, 12) + paddd %xmm14,%xmm9 + pxor %xmm9,%xmm4 + movdqa %xmm4,%xmm0 + pslld $12,%xmm0 + psrld $20,%xmm4 + por %xmm0,%xmm4 + + # x0 += x5, x15 = rotl32(x15 ^ x0, 8) + movdqa 0x00(%rsp),%xmm0 + paddd %xmm5,%xmm0 + movdqa %xmm0,0x00(%rsp) + pxor %xmm0,%xmm15 + pshufb %xmm2,%xmm15 + # x1 += x6, x12 = rotl32(x12 ^ x1, 8) + movdqa 0x10(%rsp),%xmm0 + paddd %xmm6,%xmm0 + movdqa %xmm0,0x10(%rsp) + pxor %xmm0,%xmm12 + pshufb %xmm2,%xmm12 + # x2 += x7, x13 = rotl32(x13 ^ x2, 8) + movdqa 0x20(%rsp),%xmm0 + paddd %xmm7,%xmm0 + movdqa %xmm0,0x20(%rsp) + pxor %xmm0,%xmm13 + pshufb %xmm2,%xmm13 + # x3 += x4, x14 = rotl32(x14 ^ x3, 8) + movdqa 0x30(%rsp),%xmm0 + paddd %xmm4,%xmm0 + movdqa %xmm0,0x30(%rsp) + pxor %xmm0,%xmm14 + pshufb %xmm2,%xmm14 + + # x10 += x15, x5 = rotl32(x5 ^ x10, 7) + paddd %xmm15,%xmm10 + pxor %xmm10,%xmm5 + movdqa %xmm5,%xmm0 + pslld $7,%xmm0 + psrld $25,%xmm5 + por %xmm0,%xmm5 + # x11 += x12, x6 = rotl32(x6 ^ x11, 7) + paddd %xmm12,%xmm11 + pxor %xmm11,%xmm6 + movdqa %xmm6,%xmm0 + pslld $7,%xmm0 + psrld $25,%xmm6 + por %xmm0,%xmm6 + # x8 += x13, x7 = rotl32(x7 ^ x8, 7) + paddd %xmm13,%xmm8 + pxor %xmm8,%xmm7 + movdqa %xmm7,%xmm0 + pslld $7,%xmm0 + psrld $25,%xmm7 + por %xmm0,%xmm7 + # x9 += x14, x4 = rotl32(x4 ^ x9, 7) + paddd %xmm14,%xmm9 + pxor %xmm9,%xmm4 + movdqa %xmm4,%xmm0 + pslld $7,%xmm0 + psrld $25,%xmm4 + por %xmm0,%xmm4 + + dec %ecx + jnz .Ldoubleround4 + + # x0[0-3] += s0[0] + # x1[0-3] += s0[1] + movq 0x00(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + paddd 0x00(%rsp),%xmm2 + movdqa %xmm2,0x00(%rsp) + paddd 0x10(%rsp),%xmm3 + movdqa %xmm3,0x10(%rsp) + # x2[0-3] += s0[2] + # x3[0-3] += s0[3] + movq 0x08(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + paddd 0x20(%rsp),%xmm2 + movdqa %xmm2,0x20(%rsp) + paddd 0x30(%rsp),%xmm3 + movdqa %xmm3,0x30(%rsp) + + # x4[0-3] += s1[0] + # x5[0-3] += s1[1] + movq 0x10(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + paddd %xmm2,%xmm4 + paddd %xmm3,%xmm5 + # x6[0-3] += s1[2] + # x7[0-3] += s1[3] + movq 0x18(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + + # x8[0-3] += s2[0] + # x9[0-3] += s2[1] + movq 0x20(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + paddd %xmm2,%xmm8 + paddd %xmm3,%xmm9 + # x10[0-3] += s2[2] + # x11[0-3] += s2[3] + movq 0x28(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + paddd %xmm2,%xmm10 + paddd %xmm3,%xmm11 + + # x12[0-3] += s3[0] + # x13[0-3] += s3[1] + movq 0x30(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + paddd %xmm2,%xmm12 + paddd %xmm3,%xmm13 + # x14[0-3] += s3[2] + # x15[0-3] += s3[3] + movq 0x38(%rdi),%xmm3 + pshufd $0x00,%xmm3,%xmm2 + pshufd $0x55,%xmm3,%xmm3 + paddd %xmm2,%xmm14 + paddd %xmm3,%xmm15 + + # x12 += counter values 0-3 + paddd %xmm1,%xmm12 + + # interleave 32-bit words in state n, n+1 + movdqa 0x00(%rsp),%xmm0 + movdqa 0x10(%rsp),%xmm1 + movdqa %xmm0,%xmm2 + punpckldq %xmm1,%xmm2 + punpckhdq %xmm1,%xmm0 + movdqa %xmm2,0x00(%rsp) + movdqa %xmm0,0x10(%rsp) + movdqa 0x20(%rsp),%xmm0 + movdqa 0x30(%rsp),%xmm1 + movdqa %xmm0,%xmm2 + punpckldq %xmm1,%xmm2 + punpckhdq %xmm1,%xmm0 + movdqa %xmm2,0x20(%rsp) + movdqa %xmm0,0x30(%rsp) + movdqa %xmm4,%xmm0 + punpckldq %xmm5,%xmm4 + punpckhdq %xmm5,%xmm0 + movdqa %xmm0,%xmm5 + movdqa %xmm6,%xmm0 + punpckldq %xmm7,%xmm6 + punpckhdq %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + movdqa %xmm8,%xmm0 + punpckldq %xmm9,%xmm8 + punpckhdq %xmm9,%xmm0 + movdqa %xmm0,%xmm9 + movdqa %xmm10,%xmm0 + punpckldq %xmm11,%xmm10 + punpckhdq %xmm11,%xmm0 + movdqa %xmm0,%xmm11 + movdqa %xmm12,%xmm0 + punpckldq %xmm13,%xmm12 + punpckhdq %xmm13,%xmm0 + movdqa %xmm0,%xmm13 + movdqa %xmm14,%xmm0 + punpckldq %xmm15,%xmm14 + punpckhdq %xmm15,%xmm0 + movdqa %xmm0,%xmm15 + + # interleave 64-bit words in state n, n+2 + movdqa 0x00(%rsp),%xmm0 + movdqa 0x20(%rsp),%xmm1 + movdqa %xmm0,%xmm2 + punpcklqdq %xmm1,%xmm2 + punpckhqdq %xmm1,%xmm0 + movdqa %xmm2,0x00(%rsp) + movdqa %xmm0,0x20(%rsp) + movdqa 0x10(%rsp),%xmm0 + movdqa 0x30(%rsp),%xmm1 + movdqa %xmm0,%xmm2 + punpcklqdq %xmm1,%xmm2 + punpckhqdq %xmm1,%xmm0 + movdqa %xmm2,0x10(%rsp) + movdqa %xmm0,0x30(%rsp) + movdqa %xmm4,%xmm0 + punpcklqdq %xmm6,%xmm4 + punpckhqdq %xmm6,%xmm0 + movdqa %xmm0,%xmm6 + movdqa %xmm5,%xmm0 + punpcklqdq %xmm7,%xmm5 + punpckhqdq %xmm7,%xmm0 + movdqa %xmm0,%xmm7 + movdqa %xmm8,%xmm0 + punpcklqdq %xmm10,%xmm8 + punpckhqdq %xmm10,%xmm0 + movdqa %xmm0,%xmm10 + movdqa %xmm9,%xmm0 + punpcklqdq %xmm11,%xmm9 + punpckhqdq %xmm11,%xmm0 + movdqa %xmm0,%xmm11 + movdqa %xmm12,%xmm0 + punpcklqdq %xmm14,%xmm12 + punpckhqdq %xmm14,%xmm0 + movdqa %xmm0,%xmm14 + movdqa %xmm13,%xmm0 + punpcklqdq %xmm15,%xmm13 + punpckhqdq %xmm15,%xmm0 + movdqa %xmm0,%xmm15 + + # xor with corresponding input, write to output + movdqa 0x00(%rsp),%xmm0 + movdqu 0x00(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x00(%rsi) + movdqa 0x10(%rsp),%xmm0 + movdqu 0x80(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x80(%rsi) + movdqa 0x20(%rsp),%xmm0 + movdqu 0x40(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0x40(%rsi) + movdqa 0x30(%rsp),%xmm0 + movdqu 0xc0(%rdx),%xmm1 + pxor %xmm1,%xmm0 + movdqu %xmm0,0xc0(%rsi) + movdqu 0x10(%rdx),%xmm1 + pxor %xmm1,%xmm4 + movdqu %xmm4,0x10(%rsi) + movdqu 0x90(%rdx),%xmm1 + pxor %xmm1,%xmm5 + movdqu %xmm5,0x90(%rsi) + movdqu 0x50(%rdx),%xmm1 + pxor %xmm1,%xmm6 + movdqu %xmm6,0x50(%rsi) + movdqu 0xd0(%rdx),%xmm1 + pxor %xmm1,%xmm7 + movdqu %xmm7,0xd0(%rsi) + movdqu 0x20(%rdx),%xmm1 + pxor %xmm1,%xmm8 + movdqu %xmm8,0x20(%rsi) + movdqu 0xa0(%rdx),%xmm1 + pxor %xmm1,%xmm9 + movdqu %xmm9,0xa0(%rsi) + movdqu 0x60(%rdx),%xmm1 + pxor %xmm1,%xmm10 + movdqu %xmm10,0x60(%rsi) + movdqu 0xe0(%rdx),%xmm1 + pxor %xmm1,%xmm11 + movdqu %xmm11,0xe0(%rsi) + movdqu 0x30(%rdx),%xmm1 + pxor %xmm1,%xmm12 + movdqu %xmm12,0x30(%rsi) + movdqu 0xb0(%rdx),%xmm1 + pxor %xmm1,%xmm13 + movdqu %xmm13,0xb0(%rsi) + movdqu 0x70(%rdx),%xmm1 + pxor %xmm1,%xmm14 + movdqu %xmm14,0x70(%rsi) + movdqu 0xf0(%rdx),%xmm1 + pxor %xmm1,%xmm15 + movdqu %xmm15,0xf0(%rsi) + + mov %r11,%rsp + ret +ENDPROC(chacha20_asm_4block_xor_ssse3) diff --git a/src/crypto/chacha20poly1305.c b/src/crypto/chacha20poly1305.c new file mode 100644 index 0000000..c05fe1c --- /dev/null +++ b/src/crypto/chacha20poly1305.c @@ -0,0 +1,798 @@ +/* + * Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. + * Copyright 2015 Martin Willi. + */ + +#include "../wireguard.h" +#include "chacha20poly1305.h" +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_64 +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) +#include +#include +#else +#include +#endif +#ifdef CONFIG_AS_SSSE3 +asmlinkage void chacha20_asm_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src); +asmlinkage void chacha20_asm_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src); +#endif +#ifdef CONFIG_AS_AVX2 +asmlinkage void chacha20_asm_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src); +#endif +asmlinkage void poly1305_asm_block_sse2(u32 *h, const u8 *src, const u32 *r, unsigned int blocks); +asmlinkage void poly1305_asm_2block_sse2(u32 *h, const u8 *src, const u32 *r, unsigned int blocks, const u32 *u); +#ifdef CONFIG_AS_AVX2 +asmlinkage void poly1305_asm_4block_avx2(u32 *h, const u8 *src, const u32 *r, unsigned int blocks, const u32 *u); +#endif +static bool chacha20poly1305_use_avx2 = false; +static bool chacha20poly1305_use_ssse3 = false; +static bool chacha20poly1305_use_sse2 = false; +void chacha20poly1305_init(void) +{ + chacha20poly1305_use_sse2 = cpu_has_xmm2; + chacha20poly1305_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3); + chacha20poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 && +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); +#else + cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL); +#endif +} +#else +void chacha20poly1305_init(void) { } +#endif + +#define CHACHA20_IV_SIZE 16 +#define CHACHA20_KEY_SIZE 32 +#define CHACHA20_BLOCK_SIZE 64 +#define POLY1305_BLOCK_SIZE 16 +#define POLY1305_KEY_SIZE 32 +#define POLY1305_MAC_SIZE 16 + +static inline u32 le32_to_cpuvp(const void *p) +{ + return le32_to_cpup(p); +} + +static inline u32 rotl32(u32 v, u8 n) +{ + return (v << n) | (v >> (sizeof(v) * 8 - n)); +} + +static inline u64 mlt(u64 a, u64 b) +{ + return a * b; +} + +static inline u32 sr(u64 v, u_char n) +{ + return v >> n; +} + +static inline u32 and(u32 v, u32 mask) +{ + return v & mask; +} + + +struct chacha20_ctx { + u32 state[16]; +} __aligned(32); + +static void chacha20_generic_block(struct chacha20_ctx *ctx, void *stream) +{ + u32 x[CHACHA20_BLOCK_SIZE / sizeof(u32)]; + __le32 *out = stream; + int i; + + for (i = 0; i < ARRAY_SIZE(x); i++) + x[i] = ctx->state[i]; + + for (i = 0; i < 20; i += 2) { + x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 16); + x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 16); + x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 16); + x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 16); + + x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 12); + x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 12); + x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 12); + x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 12); + + x[0] += x[4]; x[12] = rotl32(x[12] ^ x[0], 8); + x[1] += x[5]; x[13] = rotl32(x[13] ^ x[1], 8); + x[2] += x[6]; x[14] = rotl32(x[14] ^ x[2], 8); + x[3] += x[7]; x[15] = rotl32(x[15] ^ x[3], 8); + + x[8] += x[12]; x[4] = rotl32(x[4] ^ x[8], 7); + x[9] += x[13]; x[5] = rotl32(x[5] ^ x[9], 7); + x[10] += x[14]; x[6] = rotl32(x[6] ^ x[10], 7); + x[11] += x[15]; x[7] = rotl32(x[7] ^ x[11], 7); + + x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 16); + x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 16); + x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 16); + x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 16); + + x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 12); + x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 12); + x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 12); + x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 12); + + x[0] += x[5]; x[15] = rotl32(x[15] ^ x[0], 8); + x[1] += x[6]; x[12] = rotl32(x[12] ^ x[1], 8); + x[2] += x[7]; x[13] = rotl32(x[13] ^ x[2], 8); + x[3] += x[4]; x[14] = rotl32(x[14] ^ x[3], 8); + + x[10] += x[15]; x[5] = rotl32(x[5] ^ x[10], 7); + x[11] += x[12]; x[6] = rotl32(x[6] ^ x[11], 7); + x[8] += x[13]; x[7] = rotl32(x[7] ^ x[8], 7); + x[9] += x[14]; x[4] = rotl32(x[4] ^ x[9], 7); + } + + for (i = 0; i < ARRAY_SIZE(x); i++) + out[i] = cpu_to_le32(x[i] + ctx->state[i]); + + ctx->state[12]++; +} + +static void chacha20_keysetup(struct chacha20_ctx *ctx, const u8 key[32], const u8 nonce[8]) +{ + static const char constant[16] = "expand 32-byte k"; + ctx->state[0] = le32_to_cpuvp(constant + 0); + ctx->state[1] = le32_to_cpuvp(constant + 4); + ctx->state[2] = le32_to_cpuvp(constant + 8); + ctx->state[3] = le32_to_cpuvp(constant + 12); + ctx->state[4] = le32_to_cpuvp(key + 0); + ctx->state[5] = le32_to_cpuvp(key + 4); + ctx->state[6] = le32_to_cpuvp(key + 8); + ctx->state[7] = le32_to_cpuvp(key + 12); + ctx->state[8] = le32_to_cpuvp(key + 16); + ctx->state[9] = le32_to_cpuvp(key + 20); + ctx->state[10] = le32_to_cpuvp(key + 24); + ctx->state[11] = le32_to_cpuvp(key + 28); + ctx->state[12] = 0; + ctx->state[13] = 0; + ctx->state[14] = le32_to_cpuvp(nonce + 0); + ctx->state[15] = le32_to_cpuvp(nonce + 4); +} + +static void chacha20_crypt(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, unsigned int bytes, bool have_simd) +{ + u8 buf[CHACHA20_BLOCK_SIZE]; + + if (!have_simd +#ifdef CONFIG_X86_64 + || !chacha20poly1305_use_ssse3 +#endif + ) + goto no_simd; + +#ifdef CONFIG_X86_64 +#ifdef CONFIG_AS_AVX2 + if (chacha20poly1305_use_avx2) { + while (bytes >= CHACHA20_BLOCK_SIZE * 8) { + chacha20_asm_8block_xor_avx2(ctx->state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE * 8; + src += CHACHA20_BLOCK_SIZE * 8; + dst += CHACHA20_BLOCK_SIZE * 8; + ctx->state[12] += 8; + } + } +#endif +#ifdef CONFIG_AS_SSSE3 + while (bytes >= CHACHA20_BLOCK_SIZE * 4) { + chacha20_asm_4block_xor_ssse3(ctx->state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE * 4; + src += CHACHA20_BLOCK_SIZE * 4; + dst += CHACHA20_BLOCK_SIZE * 4; + ctx->state[12] += 4; + } + while (bytes >= CHACHA20_BLOCK_SIZE) { + chacha20_asm_block_xor_ssse3(ctx->state, dst, src); + bytes -= CHACHA20_BLOCK_SIZE; + src += CHACHA20_BLOCK_SIZE; + dst += CHACHA20_BLOCK_SIZE; + ctx->state[12]++; + } + if (bytes) { + memcpy(buf, src, bytes); + chacha20_asm_block_xor_ssse3(ctx->state, buf, buf); + memcpy(dst, buf, bytes); + } + return; +#endif +#endif + +no_simd: + if (dst != src) + memcpy(dst, src, bytes); + + while (bytes >= CHACHA20_BLOCK_SIZE) { + chacha20_generic_block(ctx, buf); + crypto_xor(dst, buf, CHACHA20_BLOCK_SIZE); + bytes -= CHACHA20_BLOCK_SIZE; + dst += CHACHA20_BLOCK_SIZE; + } + if (bytes) { + chacha20_generic_block(ctx, buf); + crypto_xor(dst, buf, bytes); + } +} + +struct poly1305_ctx { + /* key */ + u32 r[5]; + /* finalize key */ + u32 s[4]; + /* accumulator */ + u32 h[5]; + /* partial buffer */ + u8 buf[POLY1305_BLOCK_SIZE]; + /* bytes used in partial buffer */ + unsigned int buflen; + /* derived key u set? */ + bool uset; + /* derived keys r^3, r^4 set? */ + bool wset; + /* derived Poly1305 key r^2 */ + u32 u[5]; + /* derived Poly1305 key r^3 */ + u32 r3[5]; + /* derived Poly1305 key r^4 */ + u32 r4[5]; +}; + +static void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE]) +{ + memset(ctx, 0, sizeof(struct poly1305_ctx)); + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + ctx->r[0] = (le32_to_cpuvp(key + 0) >> 0) & 0x3ffffff; + ctx->r[1] = (le32_to_cpuvp(key + 3) >> 2) & 0x3ffff03; + ctx->r[2] = (le32_to_cpuvp(key + 6) >> 4) & 0x3ffc0ff; + ctx->r[3] = (le32_to_cpuvp(key + 9) >> 6) & 0x3f03fff; + ctx->r[4] = (le32_to_cpuvp(key + 12) >> 8) & 0x00fffff; + ctx->s[0] = le32_to_cpuvp(key + 16); + ctx->s[1] = le32_to_cpuvp(key + 20); + ctx->s[2] = le32_to_cpuvp(key + 24); + ctx->s[3] = le32_to_cpuvp(key + 28); +} + +static unsigned int poly1305_generic_blocks(struct poly1305_ctx *ctx, const u8 *src, unsigned int srclen, u32 hibit) +{ + u32 r0, r1, r2, r3, r4; + u32 s1, s2, s3, s4; + u32 h0, h1, h2, h3, h4; + u64 d0, d1, d2, d3, d4; + + r0 = ctx->r[0]; + r1 = ctx->r[1]; + r2 = ctx->r[2]; + r3 = ctx->r[3]; + r4 = ctx->r[4]; + + s1 = r1 * 5; + s2 = r2 * 5; + s3 = r3 * 5; + s4 = r4 * 5; + + h0 = ctx->h[0]; + h1 = ctx->h[1]; + h2 = ctx->h[2]; + h3 = ctx->h[3]; + h4 = ctx->h[4]; + + while (likely(srclen >= POLY1305_BLOCK_SIZE)) { + /* h += m[i] */ + h0 += (le32_to_cpuvp(src + 0) >> 0) & 0x3ffffff; + h1 += (le32_to_cpuvp(src + 3) >> 2) & 0x3ffffff; + h2 += (le32_to_cpuvp(src + 6) >> 4) & 0x3ffffff; + h3 += (le32_to_cpuvp(src + 9) >> 6) & 0x3ffffff; + h4 += (le32_to_cpuvp(src + 12) >> 8) | hibit; + + /* h *= r */ + d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) + mlt(h3, s2) + mlt(h4, s1); + d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) + mlt(h3, s3) + mlt(h4, s2); + d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) + mlt(h3, s4) + mlt(h4, s3); + d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) + mlt(h3, r0) + mlt(h4, s4); + d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) + mlt(h3, r1) + mlt(h4, r0); + + /* (partial) h %= p */ + d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff); + d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff); + d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff); + d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff); + h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff); + h1 += h0 >> 26; h0 = h0 & 0x3ffffff; + + src += POLY1305_BLOCK_SIZE; + srclen -= POLY1305_BLOCK_SIZE; + } + + ctx->h[0] = h0; + ctx->h[1] = h1; + ctx->h[2] = h2; + ctx->h[3] = h3; + ctx->h[4] = h4; + + return srclen; +} + +#ifdef CONFIG_X86_64 +static void poly1305_simd_mult(u32 *a, const u32 *b) +{ + u8 m[POLY1305_BLOCK_SIZE]; + + memset(m, 0, sizeof(m)); + /* The poly1305 block function adds a hi-bit to the accumulator which + * we don't need for key multiplication; compensate for it. */ + a[4] -= 1 << 24; + poly1305_asm_block_sse2(a, m, b, 1); +} + +static unsigned int poly1305_simd_blocks(struct poly1305_ctx *ctx, const u8 *src, unsigned int srclen) +{ + unsigned int blocks; + +#ifdef CONFIG_AS_AVX2 + if (chacha20poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) { + if (unlikely(!ctx->wset)) { + if (!ctx->uset) { + memcpy(ctx->u, ctx->r, sizeof(ctx->u)); + poly1305_simd_mult(ctx->u, ctx->r); + ctx->uset = true; + } + memcpy(ctx->r3, ctx->u, sizeof(ctx->u)); + poly1305_simd_mult(ctx->r3, ctx->r); + memcpy(ctx->r4, ctx->r3, sizeof(ctx->u)); + poly1305_simd_mult(ctx->r4, ctx->r); + ctx->wset = true; + } + blocks = srclen / (POLY1305_BLOCK_SIZE * 4); + poly1305_asm_4block_avx2(ctx->h, src, ctx->r, blocks, ctx->u); + src += POLY1305_BLOCK_SIZE * 4 * blocks; + srclen -= POLY1305_BLOCK_SIZE * 4 * blocks; + } +#endif + if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) { + if (unlikely(!ctx->uset)) { + memcpy(ctx->u, ctx->r, sizeof(ctx->u)); + poly1305_simd_mult(ctx->u, ctx->r); + ctx->uset = true; + } + blocks = srclen / (POLY1305_BLOCK_SIZE * 2); + poly1305_asm_2block_sse2(ctx->h, src, ctx->r, blocks, ctx->u); + src += POLY1305_BLOCK_SIZE * 2 * blocks; + srclen -= POLY1305_BLOCK_SIZE * 2 * blocks; + } + if (srclen >= POLY1305_BLOCK_SIZE) { + poly1305_asm_block_sse2(ctx->h, src, ctx->r, 1); + srclen -= POLY1305_BLOCK_SIZE; + } + return srclen; +} +#endif + +static void poly1305_update(struct poly1305_ctx *ctx, const u8 *src, unsigned int srclen, bool have_simd) +{ + unsigned int bytes; + + if (unlikely(ctx->buflen)) { + bytes = min(srclen, POLY1305_BLOCK_SIZE - ctx->buflen); + memcpy(ctx->buf + ctx->buflen, src, bytes); + src += bytes; + srclen -= bytes; + ctx->buflen += bytes; + + if (ctx->buflen == POLY1305_BLOCK_SIZE) { +#ifdef CONFIG_X86_64 + + if (have_simd && chacha20poly1305_use_sse2) + poly1305_simd_blocks(ctx, ctx->buf, POLY1305_BLOCK_SIZE); + else +#endif + poly1305_generic_blocks(ctx, ctx->buf, POLY1305_BLOCK_SIZE, 1 << 24); + ctx->buflen = 0; + } + } + + if (likely(srclen >= POLY1305_BLOCK_SIZE)) { +#ifdef CONFIG_X86_64 + + if (have_simd && chacha20poly1305_use_sse2) + bytes = poly1305_simd_blocks(ctx, src, srclen); + else +#endif + bytes = poly1305_generic_blocks(ctx, src, srclen, 1 << 24); + src += srclen - bytes; + srclen = bytes; + } + + if (unlikely(srclen)) { + ctx->buflen = srclen; + memcpy(ctx->buf, src, srclen); + } +} + +static void poly1305_finish(struct poly1305_ctx *ctx, u8 *dst) +{ + __le32 *mac = (__le32 *)dst; + u32 h0, h1, h2, h3, h4; + u32 g0, g1, g2, g3, g4; + u32 mask; + u64 f = 0; + + if (unlikely(ctx->buflen)) { + ctx->buf[ctx->buflen++] = 1; + memset(ctx->buf + ctx->buflen, 0, POLY1305_BLOCK_SIZE - ctx->buflen); + poly1305_generic_blocks(ctx, ctx->buf, POLY1305_BLOCK_SIZE, 0); + } + + /* fully carry h */ + h0 = ctx->h[0]; + h1 = ctx->h[1]; + h2 = ctx->h[2]; + h3 = ctx->h[3]; + h4 = ctx->h[4]; + + h2 += (h1 >> 26); h1 = h1 & 0x3ffffff; + h3 += (h2 >> 26); h2 = h2 & 0x3ffffff; + h4 += (h3 >> 26); h3 = h3 & 0x3ffffff; + h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff; + h1 += (h0 >> 26); h0 = h0 & 0x3ffffff; + + /* compute h + -p */ + g0 = h0 + 5; + g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff; + g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff; + g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff; + g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff; + + /* select h if h < p, or h + -p if h >= p */ + mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1; + g0 &= mask; + g1 &= mask; + g2 &= mask; + g3 &= mask; + g4 &= mask; + mask = ~mask; + h0 = (h0 & mask) | g0; + h1 = (h1 & mask) | g1; + h2 = (h2 & mask) | g2; + h3 = (h3 & mask) | g3; + h4 = (h4 & mask) | g4; + + /* h = h % (2^128) */ + h0 = (h0 >> 0) | (h1 << 26); + h1 = (h1 >> 6) | (h2 << 20); + h2 = (h2 >> 12) | (h3 << 14); + h3 = (h3 >> 18) | (h4 << 8); + + /* mac = (h + s) % (2^128) */ + f = (f >> 32) + h0 + ctx->s[0]; mac[0] = cpu_to_le32(f); + f = (f >> 32) + h1 + ctx->s[1]; mac[1] = cpu_to_le32(f); + f = (f >> 32) + h2 + ctx->s[2]; mac[2] = cpu_to_le32(f); + f = (f >> 32) + h3 + ctx->s[3]; mac[3] = cpu_to_le32(f); +} + +static const uint8_t pad0[16] = { 0 }; + +static struct crypto_alg chacha20_alg = { + .cra_blocksize = 1, + .cra_alignmask = sizeof(u32) - 1 +}; +static struct crypto_blkcipher chacha20_cipher = { + .base = { + .__crt_alg = &chacha20_alg + } +}; +static struct blkcipher_desc chacha20_desc = { + .tfm = &chacha20_cipher +}; + +bool chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src_len, + const uint8_t *ad, const size_t ad_len, + const uint64_t nonce, const uint8_t key[CHACHA20POLY1305_KEYLEN]) +{ + struct poly1305_ctx poly1305_state; + struct chacha20_ctx chacha20_state; + uint8_t block0[CHACHA20_BLOCK_SIZE] = { 0 }; + __le64 len; + __le64 le_nonce = cpu_to_le64(nonce); + bool have_simd = false; + +#ifdef CONFIG_X86_64 + have_simd = irq_fpu_usable(); + if (have_simd) + kernel_fpu_begin(); +#endif + + chacha20_keysetup(&chacha20_state, key, (uint8_t *)&le_nonce); + + chacha20_crypt(&chacha20_state, block0, block0, sizeof(block0), have_simd); + poly1305_init(&poly1305_state, block0); + memzero_explicit(block0, sizeof(block0)); + + poly1305_update(&poly1305_state, ad, ad_len, have_simd); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); + + chacha20_crypt(&chacha20_state, dst, src, src_len, have_simd); + + poly1305_update(&poly1305_state, dst, src_len, have_simd); + poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, have_simd); + + len = cpu_to_le64(ad_len); + poly1305_update(&poly1305_state, (uint8_t *)&len, sizeof(len), have_simd); + + len = cpu_to_le64(src_len); + poly1305_update(&poly1305_state, (uint8_t *)&len, sizeof(len), have_simd); + + poly1305_finish(&poly1305_state, dst + src_len); + + memzero_explicit(&poly1305_state, sizeof(poly1305_state)); + memzero_explicit(&chacha20_state, sizeof(chacha20_state)); + +#ifdef CONFIG_X86_64 + if (have_simd) + kernel_fpu_end(); +#endif + + return true; +} + +bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, + const uint8_t *ad, const size_t ad_len, + const uint64_t nonce, const uint8_t key[CHACHA20POLY1305_KEYLEN]) +{ + struct poly1305_ctx poly1305_state; + struct chacha20_ctx chacha20_state; + struct blkcipher_walk walk; + uint8_t block0[CHACHA20_BLOCK_SIZE] = { 0 }; + uint8_t mac[POLY1305_MAC_SIZE]; + __le64 len; + __le64 le_nonce = cpu_to_le64(nonce); + bool have_simd = false; + +#ifdef CONFIG_X86_64 + have_simd = irq_fpu_usable(); + if (have_simd) + kernel_fpu_begin(); +#endif + + chacha20_keysetup(&chacha20_state, key, (uint8_t *)&le_nonce); + + chacha20_crypt(&chacha20_state, block0, block0, sizeof(block0), have_simd); + poly1305_init(&poly1305_state, block0); + memzero_explicit(block0, sizeof(block0)); + + poly1305_update(&poly1305_state, ad, ad_len, have_simd); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); + + if (likely(src_len)) { + blkcipher_walk_init(&walk, dst, src, src_len); + blkcipher_walk_virt_block(&chacha20_desc, &walk, CHACHA20_BLOCK_SIZE); + while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { + size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE); + chacha20_crypt(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, have_simd); + poly1305_update(&poly1305_state, walk.dst.virt.addr, chunk_len, have_simd); + blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE); + } + if (walk.nbytes) { + chacha20_crypt(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, have_simd); + poly1305_update(&poly1305_state, walk.dst.virt.addr, walk.nbytes, have_simd); + blkcipher_walk_done(&chacha20_desc, &walk, 0); + } + } + + poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, have_simd); + + len = cpu_to_le64(ad_len); + poly1305_update(&poly1305_state, (uint8_t *)&len, sizeof(len), have_simd); + + len = cpu_to_le64(src_len); + poly1305_update(&poly1305_state, (uint8_t *)&len, sizeof(len), have_simd); + + poly1305_finish(&poly1305_state, mac); + scatterwalk_map_and_copy(mac, dst, src_len, sizeof(mac), 1); + memzero_explicit(&poly1305_state, sizeof(poly1305_state)); + memzero_explicit(&chacha20_state, sizeof(chacha20_state)); + memzero_explicit(mac, sizeof(mac)); + +#ifdef CONFIG_X86_64 + if (have_simd) + kernel_fpu_end(); +#endif + + return true; +} + +bool chacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, const size_t src_len, + const uint8_t *ad, const size_t ad_len, + const uint64_t nonce, const uint8_t key[CHACHA20POLY1305_KEYLEN]) +{ + struct poly1305_ctx poly1305_state; + struct chacha20_ctx chacha20_state; + int ret; + uint8_t block0[CHACHA20_BLOCK_SIZE] = { 0 }; + uint8_t mac[POLY1305_MAC_SIZE]; + size_t dst_len; + __le64 len; + __le64 le_nonce = cpu_to_le64(nonce); + bool have_simd = false; + + if (unlikely(src_len < POLY1305_MAC_SIZE)) + return false; + +#ifdef CONFIG_X86_64 + have_simd = irq_fpu_usable(); + if (have_simd) + kernel_fpu_begin(); +#endif + + chacha20_keysetup(&chacha20_state, key, (uint8_t *)&le_nonce); + + chacha20_crypt(&chacha20_state, block0, block0, sizeof(block0), have_simd); + poly1305_init(&poly1305_state, block0); + memzero_explicit(block0, sizeof(block0)); + + poly1305_update(&poly1305_state, ad, ad_len, have_simd); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); + + dst_len = src_len - POLY1305_MAC_SIZE; + poly1305_update(&poly1305_state, src, dst_len, have_simd); + poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, have_simd); + + len = cpu_to_le64(ad_len); + poly1305_update(&poly1305_state, (uint8_t *)&len, sizeof(len), have_simd); + + len = cpu_to_le64(dst_len); + poly1305_update(&poly1305_state, (uint8_t *)&len, sizeof(len), have_simd); + + poly1305_finish(&poly1305_state, mac); + memzero_explicit(&poly1305_state, sizeof(poly1305_state)); + + ret = crypto_memneq(mac, src + dst_len, POLY1305_MAC_SIZE); + memzero_explicit(mac, POLY1305_MAC_SIZE); + if (likely(!ret)) + chacha20_crypt(&chacha20_state, dst, src, dst_len, have_simd); + + memzero_explicit(&chacha20_state, sizeof(chacha20_state)); +#ifdef CONFIG_X86_64 + if (have_simd) + kernel_fpu_end(); +#endif + return !ret; +} + +bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, + const uint8_t *ad, const size_t ad_len, + const uint64_t nonce, const uint8_t key[CHACHA20POLY1305_KEYLEN]) +{ + struct poly1305_ctx poly1305_state; + struct chacha20_ctx chacha20_state; + struct blkcipher_walk walk; + int ret; + uint8_t block0[CHACHA20_BLOCK_SIZE] = { 0 }; + uint8_t read_mac[POLY1305_MAC_SIZE], computed_mac[POLY1305_MAC_SIZE]; + size_t dst_len; + __le64 len; + __le64 le_nonce = cpu_to_le64(nonce); + bool have_simd = false; + + if (unlikely(src_len < POLY1305_MAC_SIZE)) + return false; + +#ifdef CONFIG_X86_64 + have_simd = irq_fpu_usable(); + if (have_simd) + kernel_fpu_begin(); +#endif + + chacha20_keysetup(&chacha20_state, key, (uint8_t *)&le_nonce); + + chacha20_crypt(&chacha20_state, block0, block0, sizeof(block0), have_simd); + poly1305_init(&poly1305_state, block0); + memzero_explicit(block0, sizeof(block0)); + + poly1305_update(&poly1305_state, ad, ad_len, have_simd); + poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd); + + dst_len = src_len - POLY1305_MAC_SIZE; + if (likely(dst_len)) { + blkcipher_walk_init(&walk, dst, src, dst_len); + blkcipher_walk_virt_block(&chacha20_desc, &walk, CHACHA20_BLOCK_SIZE); + while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { + size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE); + poly1305_update(&poly1305_state, walk.src.virt.addr, chunk_len, have_simd); + chacha20_crypt(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, have_simd); + blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE); + } + if (walk.nbytes) { + poly1305_update(&poly1305_state, walk.dst.virt.addr, walk.nbytes, have_simd); + chacha20_crypt(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, have_simd); + blkcipher_walk_done(&chacha20_desc, &walk, 0); + } + } + + poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, have_simd); + + len = cpu_to_le64(ad_len); + poly1305_update(&poly1305_state, (uint8_t *)&len, sizeof(len), have_simd); + + len = cpu_to_le64(dst_len); + poly1305_update(&poly1305_state, (uint8_t *)&len, sizeof(len), have_simd); + + poly1305_finish(&poly1305_state, computed_mac); + memzero_explicit(&poly1305_state, sizeof(poly1305_state)); + + scatterwalk_map_and_copy(read_mac, src, dst_len, POLY1305_MAC_SIZE, 0); + ret = crypto_memneq(read_mac, computed_mac, POLY1305_MAC_SIZE); + memzero_explicit(read_mac, POLY1305_MAC_SIZE); + memzero_explicit(computed_mac, POLY1305_MAC_SIZE); + memzero_explicit(&chacha20_state, sizeof(chacha20_state)); +#ifdef CONFIG_X86_64 + if (have_simd) + kernel_fpu_end(); +#endif + return !ret; +} + +#ifdef DEBUG +/* ChaCha20-Poly1305 AEAD test vectors from RFC7539 2.8.2 */ +struct chacha20poly1305_testvec { + uint8_t *key, *nonce, *assoc, *input, *result; + size_t alen, ilen; +}; +static const struct chacha20poly1305_testvec chacha20poly1305_enc_vectors[] = { { + .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a\xf3\x33\x88\x86\x04\xf6\xb5\xf0\x47\x39\x17\xc1\x40\x2b\x80\x09\x9d\xca\x5c\xbc\x20\x70\x75\xc0", + .nonce = "\x01\x02\x03\x04\x05\x06\x07\x08", + .assoc = "\xf3\x33\x88\x86\x00\x00\x00\x00\x00\x00\x4e\x91", + .alen = 12, + .input = "\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x72\x65\x20\x64\x72\x61\x66\x74\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x76\x61\x6c\x69\x64\x20\x66\x6f\x72\x20\x61\x20\x6d\x61\x78\x69\x6d\x75\x6d\x20\x6f\x66\x20\x73\x69\x78\x20\x6d\x6f\x6e\x74\x68\x73\x20\x61\x6e\x64\x20\x6d\x61\x79\x20\x62\x65\x20\x75\x70\x64\x61\x74\x65\x64\x2c\x20\x72\x65\x70\x6c\x61\x63\x65\x64\x2c\x20\x6f\x72\x20\x6f\x62\x73\x6f\x6c\x65\x74\x65\x64\x20\x62\x79\x20\x6f\x74\x68\x65\x72\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x61\x74\x20\x61\x6e\x79\x20\x74\x69\x6d\x65\x2e\x20\x49\x74\x20\x69\x73\x20\x69\x6e\x61\x70\x70\x72\x6f\x70\x72\x69\x61\x74\x65\x20\x74\x6f\x20\x75\x73\x65\x20\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x73\x20\x72\x65\x66\x65\x72\x65\x6e\x63\x65\x20\x6d\x61\x74\x65\x72\x69\x61\x6c\x20\x6f\x72\x20\x74\x6f\x20\x63\x69\x74\x65\x20\x74\x68\x65\x6d\x20\x6f\x74\x68\x65\x72\x20\x74\x68\x61\x6e\x20\x61\x73\x20\x2f\xe2\x80\x9c\x77\x6f\x72\x6b\x20\x69\x6e\x20\x70\x72\x6f\x67\x72\x65\x73\x73\x2e\x2f\xe2\x80\x9d", + .ilen = 265, + .result = "\x64\xa0\x86\x15\x75\x86\x1a\xf4\x60\xf0\x62\xc7\x9b\xe6\x43\xbd\x5e\x80\x5c\xfd\x34\x5c\xf3\x89\xf1\x08\x67\x0a\xc7\x6c\x8c\xb2\x4c\x6c\xfc\x18\x75\x5d\x43\xee\xa0\x9e\xe9\x4e\x38\x2d\x26\xb0\xbd\xb7\xb7\x3c\x32\x1b\x01\x00\xd4\xf0\x3b\x7f\x35\x58\x94\xcf\x33\x2f\x83\x0e\x71\x0b\x97\xce\x98\xc8\xa8\x4a\xbd\x0b\x94\x81\x14\xad\x17\x6e\x00\x8d\x33\xbd\x60\xf9\x82\xb1\xff\x37\xc8\x55\x97\x97\xa0\x6e\xf4\xf0\xef\x61\xc1\x86\x32\x4e\x2b\x35\x06\x38\x36\x06\x90\x7b\x6a\x7c\x02\xb0\xf9\xf6\x15\x7b\x53\xc8\x67\xe4\xb9\x16\x6c\x76\x7b\x80\x4d\x46\xa5\x9b\x52\x16\xcd\xe7\xa4\xe9\x90\x40\xc5\xa4\x04\x33\x22\x5e\xe2\x82\xa1\xb0\xa0\x6c\x52\x3e\xaf\x45\x34\xd7\xf8\x3f\xa1\x15\x5b\x00\x47\x71\x8c\xbc\x54\x6a\x0d\x07\x2b\x04\xb3\x56\x4e\xea\x1b\x42\x22\x73\xf5\x48\x27\x1a\x0b\xb2\x31\x60\x53\xfa\x76\x99\x19\x55\xeb\xd6\x31\x59\x43\x4e\xce\xbb\x4e\x46\x6d\xae\x5a\x10\x73\xa6\x72\x76\x27\x09\x7a\x10\x49\xe6\x17\xd9\x1d\x36\x10\x94\xfa\x68\xf0\xff\x77\x98\x71\x30\x30\x5b\xea\xba\x2e\xda\x04\xdf\x99\x7b\x71\x4d\x6c\x6f\x2c\x29\xa6\xad\x5c\xb4\x02\x2b\x02\x70\x9b\xee\xad\x9d\x67\x89\x0c\xbb\x22\x39\x23\x36\xfe\xa1\x85\x1f\x38" +} }; +static const struct chacha20poly1305_testvec chacha20poly1305_dec_vectors[] = { { + .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a\xf3\x33\x88\x86\x04\xf6\xb5\xf0\x47\x39\x17\xc1\x40\x2b\x80\x09\x9d\xca\x5c\xbc\x20\x70\x75\xc0", + .nonce = "\x01\x02\x03\x04\x05\x06\x07\x08", + .assoc = "\xf3\x33\x88\x86\x00\x00\x00\x00\x00\x00\x4e\x91", + .alen = 12, + .input = "\x64\xa0\x86\x15\x75\x86\x1a\xf4\x60\xf0\x62\xc7\x9b\xe6\x43\xbd\x5e\x80\x5c\xfd\x34\x5c\xf3\x89\xf1\x08\x67\x0a\xc7\x6c\x8c\xb2\x4c\x6c\xfc\x18\x75\x5d\x43\xee\xa0\x9e\xe9\x4e\x38\x2d\x26\xb0\xbd\xb7\xb7\x3c\x32\x1b\x01\x00\xd4\xf0\x3b\x7f\x35\x58\x94\xcf\x33\x2f\x83\x0e\x71\x0b\x97\xce\x98\xc8\xa8\x4a\xbd\x0b\x94\x81\x14\xad\x17\x6e\x00\x8d\x33\xbd\x60\xf9\x82\xb1\xff\x37\xc8\x55\x97\x97\xa0\x6e\xf4\xf0\xef\x61\xc1\x86\x32\x4e\x2b\x35\x06\x38\x36\x06\x90\x7b\x6a\x7c\x02\xb0\xf9\xf6\x15\x7b\x53\xc8\x67\xe4\xb9\x16\x6c\x76\x7b\x80\x4d\x46\xa5\x9b\x52\x16\xcd\xe7\xa4\xe9\x90\x40\xc5\xa4\x04\x33\x22\x5e\xe2\x82\xa1\xb0\xa0\x6c\x52\x3e\xaf\x45\x34\xd7\xf8\x3f\xa1\x15\x5b\x00\x47\x71\x8c\xbc\x54\x6a\x0d\x07\x2b\x04\xb3\x56\x4e\xea\x1b\x42\x22\x73\xf5\x48\x27\x1a\x0b\xb2\x31\x60\x53\xfa\x76\x99\x19\x55\xeb\xd6\x31\x59\x43\x4e\xce\xbb\x4e\x46\x6d\xae\x5a\x10\x73\xa6\x72\x76\x27\x09\x7a\x10\x49\xe6\x17\xd9\x1d\x36\x10\x94\xfa\x68\xf0\xff\x77\x98\x71\x30\x30\x5b\xea\xba\x2e\xda\x04\xdf\x99\x7b\x71\x4d\x6c\x6f\x2c\x29\xa6\xad\x5c\xb4\x02\x2b\x02\x70\x9b\xee\xad\x9d\x67\x89\x0c\xbb\x22\x39\x23\x36\xfe\xa1\x85\x1f\x38", + .ilen = 281, + .result = "\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x72\x65\x20\x64\x72\x61\x66\x74\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x76\x61\x6c\x69\x64\x20\x66\x6f\x72\x20\x61\x20\x6d\x61\x78\x69\x6d\x75\x6d\x20\x6f\x66\x20\x73\x69\x78\x20\x6d\x6f\x6e\x74\x68\x73\x20\x61\x6e\x64\x20\x6d\x61\x79\x20\x62\x65\x20\x75\x70\x64\x61\x74\x65\x64\x2c\x20\x72\x65\x70\x6c\x61\x63\x65\x64\x2c\x20\x6f\x72\x20\x6f\x62\x73\x6f\x6c\x65\x74\x65\x64\x20\x62\x79\x20\x6f\x74\x68\x65\x72\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x61\x74\x20\x61\x6e\x79\x20\x74\x69\x6d\x65\x2e\x20\x49\x74\x20\x69\x73\x20\x69\x6e\x61\x70\x70\x72\x6f\x70\x72\x69\x61\x74\x65\x20\x74\x6f\x20\x75\x73\x65\x20\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x73\x20\x72\x65\x66\x65\x72\x65\x6e\x63\x65\x20\x6d\x61\x74\x65\x72\x69\x61\x6c\x20\x6f\x72\x20\x74\x6f\x20\x63\x69\x74\x65\x20\x74\x68\x65\x6d\x20\x6f\x74\x68\x65\x72\x20\x74\x68\x61\x6e\x20\x61\x73\x20\x2f\xe2\x80\x9c\x77\x6f\x72\x6b\x20\x69\x6e\x20\x70\x72\x6f\x67\x72\x65\x73\x73\x2e\x2f\xe2\x80\x9d" +} }; + +void chacha20poly1305_selftest(void) +{ + size_t i; + uint8_t computed_result[512]; + bool success = true; + + for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) { + memset(computed_result, 0, sizeof(computed_result)); + success = chacha20poly1305_encrypt(computed_result, chacha20poly1305_enc_vectors[i].input, chacha20poly1305_enc_vectors[i].ilen, chacha20poly1305_enc_vectors[i].assoc, chacha20poly1305_enc_vectors[i].alen, le64_to_cpu(*(__force __le64 *)chacha20poly1305_enc_vectors[i].nonce), chacha20poly1305_enc_vectors[i].key); + if (memcmp(computed_result, chacha20poly1305_enc_vectors[i].result, chacha20poly1305_enc_vectors[i].ilen + POLY1305_MAC_SIZE)) { + pr_info("chacha20poly1305 encryption self-test %zu: FAIL\n", i + 1); + success = false; + } + } + for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) { + memset(computed_result, 0, sizeof(computed_result)); + success = chacha20poly1305_decrypt(computed_result, chacha20poly1305_dec_vectors[i].input, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, le64_to_cpu(*(__force __le64 *)chacha20poly1305_dec_vectors[i].nonce), chacha20poly1305_dec_vectors[i].key); + if (!success || memcmp(computed_result, chacha20poly1305_dec_vectors[i].result, chacha20poly1305_dec_vectors[i].ilen - POLY1305_MAC_SIZE)) { + pr_info("chacha20poly1305 decryption self-test %zu: FAIL\n", i + 1); + success = false; + } + } + if (success) + pr_info("chacha20poly1305 self-tests: pass\n"); +} +#endif diff --git a/src/crypto/chacha20poly1305.h b/src/crypto/chacha20poly1305.h new file mode 100644 index 0000000..c44a17e --- /dev/null +++ b/src/crypto/chacha20poly1305.h @@ -0,0 +1,35 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef CHACHA20POLY1305_H +#define CHACHA20POLY1305_H + +#include + +enum chacha20poly1305_lengths { + CHACHA20POLY1305_KEYLEN = 32, + CHACHA20POLY1305_AUTHTAGLEN = 16 +}; + +void chacha20poly1305_init(void); + +bool chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src_len, + const uint8_t *ad, const size_t ad_len, + const uint64_t nonce, const uint8_t key[CHACHA20POLY1305_KEYLEN]); + +bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, + const uint8_t *ad, const size_t ad_len, + const uint64_t nonce, const uint8_t key[CHACHA20POLY1305_KEYLEN]); + +bool chacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, const size_t src_len, + const uint8_t *ad, const size_t ad_len, + const uint64_t nonce, const uint8_t key[CHACHA20POLY1305_KEYLEN]); + +bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len, + const uint8_t *ad, const size_t ad_len, + const uint64_t nonce, const uint8_t key[CHACHA20POLY1305_KEYLEN]); + +#ifdef DEBUG +void chacha20poly1305_selftest(void); +#endif + +#endif diff --git a/src/crypto/curve25519.c b/src/crypto/curve25519.c new file mode 100644 index 0000000..8b75aa0 --- /dev/null +++ b/src/crypto/curve25519.c @@ -0,0 +1,1304 @@ +/* Original author: Adam Langley + * + * Copyright 2008 Google Inc. All Rights Reserved. + * Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. + */ + +#include "../wireguard.h" +#include "curve25519.h" +#include +#include +#include + +static __always_inline void normalize_secret(uint8_t secret[CURVE25519_POINT_SIZE]) +{ + secret[0] &= 248; + secret[31] &= 127; + secret[31] |= 64; +} + +static const uint8_t zeros[CURVE25519_POINT_SIZE] = { 0 }; + +#ifdef __SIZEOF_INT128__ +typedef uint64_t limb; +typedef limb felem[5]; +typedef __uint128_t uint128_t; + +/* Sum two numbers: output += in */ +static __always_inline void fsum(limb *output, const limb *in) +{ + output[0] += in[0]; + output[1] += in[1]; + output[2] += in[2]; + output[3] += in[3]; + output[4] += in[4]; +} + +/* Find the difference of two numbers: output = in - output + * (note the order of the arguments!) + * + * Assumes that out[i] < 2**52 + * On return, out[i] < 2**55 + */ +static __always_inline void fdifference_backwards(felem out, const felem in) +{ + /* 152 is 19 << 3 */ + static const limb two54m152 = (((limb)1) << 54) - 152; + static const limb two54m8 = (((limb)1) << 54) - 8; + + out[0] = in[0] + two54m152 - out[0]; + out[1] = in[1] + two54m8 - out[1]; + out[2] = in[2] + two54m8 - out[2]; + out[3] = in[3] + two54m8 - out[3]; + out[4] = in[4] + two54m8 - out[4]; +} + +/* Multiply a number by a scalar: output = in * scalar */ +static __always_inline void fscalar_product(felem output, const felem in, const limb scalar) +{ + uint128_t a; + + a = ((uint128_t) in[0]) * scalar; + output[0] = ((limb)a) & 0x7ffffffffffffUL; + + a = ((uint128_t) in[1]) * scalar + ((limb) (a >> 51)); + output[1] = ((limb)a) & 0x7ffffffffffffUL; + + a = ((uint128_t) in[2]) * scalar + ((limb) (a >> 51)); + output[2] = ((limb)a) & 0x7ffffffffffffUL; + + a = ((uint128_t) in[3]) * scalar + ((limb) (a >> 51)); + output[3] = ((limb)a) & 0x7ffffffffffffUL; + + a = ((uint128_t) in[4]) * scalar + ((limb) (a >> 51)); + output[4] = ((limb)a) & 0x7ffffffffffffUL; + + output[0] += (a >> 51) * 19; +} + +/* Multiply two numbers: output = in2 * in + * + * output must be distinct to both inputs. The inputs are reduced coefficient + * form, the output is not. + * + * Assumes that in[i] < 2**55 and likewise for in2. + * On return, output[i] < 2**52 + */ +static __always_inline void fmul(felem output, const felem in2, const felem in) +{ + uint128_t t[5]; + limb r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c; + + r0 = in[0]; + r1 = in[1]; + r2 = in[2]; + r3 = in[3]; + r4 = in[4]; + + s0 = in2[0]; + s1 = in2[1]; + s2 = in2[2]; + s3 = in2[3]; + s4 = in2[4]; + + t[0] = ((uint128_t) r0) * s0; + t[1] = ((uint128_t) r0) * s1 + ((uint128_t) r1) * s0; + t[2] = ((uint128_t) r0) * s2 + ((uint128_t) r2) * s0 + ((uint128_t) r1) * s1; + t[3] = ((uint128_t) r0) * s3 + ((uint128_t) r3) * s0 + ((uint128_t) r1) * s2 + ((uint128_t) r2) * s1; + t[4] = ((uint128_t) r0) * s4 + ((uint128_t) r4) * s0 + ((uint128_t) r3) * s1 + ((uint128_t) r1) * s3 + ((uint128_t) r2) * s2; + + r4 *= 19; + r1 *= 19; + r2 *= 19; + r3 *= 19; + + t[0] += ((uint128_t) r4) * s1 + ((uint128_t) r1) * s4 + ((uint128_t) r2) * s3 + ((uint128_t) r3) * s2; + t[1] += ((uint128_t) r4) * s2 + ((uint128_t) r2) * s4 + ((uint128_t) r3) * s3; + t[2] += ((uint128_t) r4) * s3 + ((uint128_t) r3) * s4; + t[3] += ((uint128_t) r4) * s4; + + r0 = (limb)t[0] & 0x7ffffffffffffUL; c = (limb)(t[0] >> 51); + t[1] += c; r1 = (limb)t[1] & 0x7ffffffffffffUL; c = (limb)(t[1] >> 51); + t[2] += c; r2 = (limb)t[2] & 0x7ffffffffffffUL; c = (limb)(t[2] >> 51); + t[3] += c; r3 = (limb)t[3] & 0x7ffffffffffffUL; c = (limb)(t[3] >> 51); + t[4] += c; r4 = (limb)t[4] & 0x7ffffffffffffUL; c = (limb)(t[4] >> 51); + r0 += c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffffUL; + r1 += c; c = r1 >> 51; r1 = r1 & 0x7ffffffffffffUL; + r2 += c; + + output[0] = r0; + output[1] = r1; + output[2] = r2; + output[3] = r3; + output[4] = r4; +} + +static __always_inline void fsquare_times(felem output, const felem in, limb count) +{ + uint128_t t[5]; + limb r0,r1,r2,r3,r4,c; + limb d0,d1,d2,d4,d419; + + r0 = in[0]; + r1 = in[1]; + r2 = in[2]; + r3 = in[3]; + r4 = in[4]; + + do { + d0 = r0 * 2; + d1 = r1 * 2; + d2 = r2 * 2 * 19; + d419 = r4 * 19; + d4 = d419 * 2; + + t[0] = ((uint128_t) r0) * r0 + ((uint128_t) d4) * r1 + (((uint128_t) d2) * (r3 )); + t[1] = ((uint128_t) d0) * r1 + ((uint128_t) d4) * r2 + (((uint128_t) r3) * (r3 * 19)); + t[2] = ((uint128_t) d0) * r2 + ((uint128_t) r1) * r1 + (((uint128_t) d4) * (r3 )); + t[3] = ((uint128_t) d0) * r3 + ((uint128_t) d1) * r2 + (((uint128_t) r4) * (d419 )); + t[4] = ((uint128_t) d0) * r4 + ((uint128_t) d1) * r3 + (((uint128_t) r2) * (r2 )); + + r0 = (limb)t[0] & 0x7ffffffffffffUL; c = (limb)(t[0] >> 51); + t[1] += c; r1 = (limb)t[1] & 0x7ffffffffffffUL; c = (limb)(t[1] >> 51); + t[2] += c; r2 = (limb)t[2] & 0x7ffffffffffffUL; c = (limb)(t[2] >> 51); + t[3] += c; r3 = (limb)t[3] & 0x7ffffffffffffUL; c = (limb)(t[3] >> 51); + t[4] += c; r4 = (limb)t[4] & 0x7ffffffffffffUL; c = (limb)(t[4] >> 51); + r0 += c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffffUL; + r1 += c; c = r1 >> 51; r1 = r1 & 0x7ffffffffffffUL; + r2 += c; + } while(--count); + + output[0] = r0; + output[1] = r1; + output[2] = r2; + output[3] = r3; + output[4] = r4; +} + +/* Load a little-endian 64-bit number */ +static inline limb load_limb(const uint8_t *in) +{ + return le64_to_cpu(*(uint64_t *)in); +} + +static inline void store_limb(uint8_t *out, limb in) +{ + *(uint64_t *)out = cpu_to_le64(in); +} + +/* Take a little-endian, 32-byte number and expand it into polynomial form */ +static inline void fexpand(limb *output, const uint8_t *in) +{ + output[0] = load_limb(in) & 0x7ffffffffffffUL; + output[1] = (load_limb(in + 6) >> 3) & 0x7ffffffffffffUL; + output[2] = (load_limb(in + 12) >> 6) & 0x7ffffffffffffUL; + output[3] = (load_limb(in + 19) >> 1) & 0x7ffffffffffffUL; + output[4] = (load_limb(in + 24) >> 12) & 0x7ffffffffffffUL; +} + +/* Take a fully reduced polynomial form number and contract it into a + * little-endian, 32-byte array + */ +static void fcontract(uint8_t *output, const felem input) +{ + uint128_t t[5]; + + t[0] = input[0]; + t[1] = input[1]; + t[2] = input[2]; + t[3] = input[3]; + t[4] = input[4]; + + t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffffUL; + t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffffUL; + t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffffUL; + t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffffUL; + t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffffUL; + + t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffffUL; + t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffffUL; + t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffffUL; + t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffffUL; + t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffffUL; + + /* now t is between 0 and 2^255-1, properly carried. */ + /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */ + + t[0] += 19; + + t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffffUL; + t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffffUL; + t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffffUL; + t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffffUL; + t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffffUL; + + /* now between 19 and 2^255-1 in both cases, and offset by 19. */ + + t[0] += 0x8000000000000UL - 19; + t[1] += 0x8000000000000UL - 1; + t[2] += 0x8000000000000UL - 1; + t[3] += 0x8000000000000UL - 1; + t[4] += 0x8000000000000UL - 1; + + /* now between 2^255 and 2^256-20, and offset by 2^255. */ + + t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffffUL; + t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffffUL; + t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffffUL; + t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffffUL; + t[4] &= 0x7ffffffffffffUL; + + store_limb(output, t[0] | (t[1] << 51)); + store_limb(output+8, (t[1] >> 13) | (t[2] << 38)); + store_limb(output+16, (t[2] >> 26) | (t[3] << 25)); + store_limb(output+24, (t[3] >> 39) | (t[4] << 12)); +} + +/* Input: Q, Q', Q-Q' + * Output: 2Q, Q+Q' + * + * x2 z3: long form + * x3 z3: long form + * x z: short form, destroyed + * xprime zprime: short form, destroyed + * qmqp: short form, preserved + */ +static void fmonty(limb *x2, limb *z2, /* output 2Q */ + limb *x3, limb *z3, /* output Q + Q' */ + limb *x, limb *z, /* input Q */ + limb *xprime, limb *zprime, /* input Q' */ + const limb *qmqp /* input Q - Q' */) +{ + limb origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5], zzprime[5], zzzprime[5]; + + memcpy(origx, x, 5 * sizeof(limb)); + fsum(x, z); + fdifference_backwards(z, origx); // does x - z + + memcpy(origxprime, xprime, sizeof(limb) * 5); + fsum(xprime, zprime); + fdifference_backwards(zprime, origxprime); + fmul(xxprime, xprime, z); + fmul(zzprime, x, zprime); + memcpy(origxprime, xxprime, sizeof(limb) * 5); + fsum(xxprime, zzprime); + fdifference_backwards(zzprime, origxprime); + fsquare_times(x3, xxprime, 1); + fsquare_times(zzzprime, zzprime, 1); + fmul(z3, zzzprime, qmqp); + + fsquare_times(xx, x, 1); + fsquare_times(zz, z, 1); + fmul(x2, xx, zz); + fdifference_backwards(zz, xx); // does zz = xx - zz + fscalar_product(zzz, zz, 121665); + fsum(zzz, xx); + fmul(z2, zz, zzz); +} + +/* Maybe swap the contents of two limb arrays (@a and @b), each @len elements + * long. Perform the swap iff @swap is non-zero. + * + * This function performs the swap without leaking any side-channel + * information. + */ +static void swap_conditional(limb a[5], limb b[5], limb iswap) +{ + unsigned i; + const limb swap = -iswap; + + for (i = 0; i < 5; ++i) { + const limb x = swap & (a[i] ^ b[i]); + a[i] ^= x; + b[i] ^= x; + } +} + +/* Calculates nQ where Q is the x-coordinate of a point on the curve + * + * resultx/resultz: the x coordinate of the resulting curve point (short form) + * n: a little endian, 32-byte number + * q: a point of the curve (short form) + */ +static void cmult(limb *resultx, limb *resultz, const uint8_t *n, const limb *q) +{ + limb a[5] = {0}, b[5] = {1}, c[5] = {1}, d[5] = {0}; + limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t; + limb e[5] = {0}, f[5] = {1}, g[5] = {0}, h[5] = {1}; + limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h; + + unsigned i, j; + + memcpy(nqpqx, q, sizeof(limb) * 5); + + for (i = 0; i < 32; ++i) { + uint8_t byte = n[31 - i]; + for (j = 0; j < 8; ++j) { + const limb bit = byte >> 7; + + swap_conditional(nqx, nqpqx, bit); + swap_conditional(nqz, nqpqz, bit); + fmonty(nqx2, nqz2, + nqpqx2, nqpqz2, + nqx, nqz, + nqpqx, nqpqz, + q); + swap_conditional(nqx2, nqpqx2, bit); + swap_conditional(nqz2, nqpqz2, bit); + + t = nqx; + nqx = nqx2; + nqx2 = t; + t = nqz; + nqz = nqz2; + nqz2 = t; + t = nqpqx; + nqpqx = nqpqx2; + nqpqx2 = t; + t = nqpqz; + nqpqz = nqpqz2; + nqpqz2 = t; + + byte <<= 1; + } + } + + memcpy(resultx, nqx, sizeof(limb) * 5); + memcpy(resultz, nqz, sizeof(limb) * 5); +} + +static void crecip(felem out, const felem z) +{ + felem a,t0,b,c; + + /* 2 */ fsquare_times(a, z, 1); // a = 2 + /* 8 */ fsquare_times(t0, a, 2); + /* 9 */ fmul(b, t0, z); // b = 9 + /* 11 */ fmul(a, b, a); // a = 11 + /* 22 */ fsquare_times(t0, a, 1); + /* 2^5 - 2^0 = 31 */ fmul(b, t0, b); + /* 2^10 - 2^5 */ fsquare_times(t0, b, 5); + /* 2^10 - 2^0 */ fmul(b, t0, b); + /* 2^20 - 2^10 */ fsquare_times(t0, b, 10); + /* 2^20 - 2^0 */ fmul(c, t0, b); + /* 2^40 - 2^20 */ fsquare_times(t0, c, 20); + /* 2^40 - 2^0 */ fmul(t0, t0, c); + /* 2^50 - 2^10 */ fsquare_times(t0, t0, 10); + /* 2^50 - 2^0 */ fmul(b, t0, b); + /* 2^100 - 2^50 */ fsquare_times(t0, b, 50); + /* 2^100 - 2^0 */ fmul(c, t0, b); + /* 2^200 - 2^100 */ fsquare_times(t0, c, 100); + /* 2^200 - 2^0 */ fmul(t0, t0, c); + /* 2^250 - 2^50 */ fsquare_times(t0, t0, 50); + /* 2^250 - 2^0 */ fmul(t0, t0, b); + /* 2^255 - 2^5 */ fsquare_times(t0, t0, 5); + /* 2^255 - 21 */ fmul(out, t0, a); +} + +void curve25519(uint8_t mypublic[CURVE25519_POINT_SIZE], const uint8_t secret[CURVE25519_POINT_SIZE], const uint8_t basepoint[CURVE25519_POINT_SIZE]) +{ + limb bp[5], x[5], z[5], zmone[5]; + uint8_t e[32]; + + memcpy(e, secret, 32); + normalize_secret(e); + + fexpand(bp, basepoint); + cmult(x, z, e, bp); + crecip(zmone, z); + fmul(z, x, zmone); + fcontract(mypublic, z); + + memzero_explicit(e, sizeof(e)); + memzero_explicit(bp, sizeof(bp)); + memzero_explicit(x, sizeof(x)); + memzero_explicit(z, sizeof(z)); + memzero_explicit(zmone, sizeof(zmone)); +} + +#else +typedef int64_t limb; + +/* Field element representation: + * + * Field elements are written as an array of signed, 64-bit limbs, least + * significant first. The value of the field element is: + * x[0] + 2^26·x[1] + x^51·x[2] + 2^102·x[3] + ... + * + * i.e. the limbs are 26, 25, 26, 25, ... bits wide. */ + +/* Sum two numbers: output += in */ +static void fsum(limb *output, const limb *in) +{ + unsigned i; + for (i = 0; i < 10; i += 2) { + output[0 + i] = output[0 + i] + in[0 + i]; + output[1 + i] = output[1 + i] + in[1 + i]; + } +} + +/* Find the difference of two numbers: output = in - output + * (note the order of the arguments!). */ +static void fdifference(limb *output, const limb *in) +{ + unsigned i; + for (i = 0; i < 10; ++i) + output[i] = in[i] - output[i]; +} + +/* Multiply a number by a scalar: output = in * scalar */ +static void fscalar_product(limb *output, const limb *in, const limb scalar) +{ + unsigned i; + for (i = 0; i < 10; ++i) + output[i] = in[i] * scalar; +} + +/* Multiply two numbers: output = in2 * in + * + * output must be distinct to both inputs. The inputs are reduced coefficient + * form, the output is not. + * + * output[x] <= 14 * the largest product of the input limbs. */ +static void fproduct(limb *output, const limb *in2, const limb *in) +{ + output[0] = ((limb) ((int32_t) in2[0])) * ((int32_t) in[0]); + output[1] = ((limb) ((int32_t) in2[0])) * ((int32_t) in[1]) + + ((limb) ((int32_t) in2[1])) * ((int32_t) in[0]); + output[2] = 2 * ((limb) ((int32_t) in2[1])) * ((int32_t) in[1]) + + ((limb) ((int32_t) in2[0])) * ((int32_t) in[2]) + + ((limb) ((int32_t) in2[2])) * ((int32_t) in[0]); + output[3] = ((limb) ((int32_t) in2[1])) * ((int32_t) in[2]) + + ((limb) ((int32_t) in2[2])) * ((int32_t) in[1]) + + ((limb) ((int32_t) in2[0])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[3])) * ((int32_t) in[0]); + output[4] = ((limb) ((int32_t) in2[2])) * ((int32_t) in[2]) + + 2 * (((limb) ((int32_t) in2[1])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[3])) * ((int32_t) in[1])) + + ((limb) ((int32_t) in2[0])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in2[4])) * ((int32_t) in[0]); + output[5] = ((limb) ((int32_t) in2[2])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[3])) * ((int32_t) in[2]) + + ((limb) ((int32_t) in2[1])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in2[4])) * ((int32_t) in[1]) + + ((limb) ((int32_t) in2[0])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[5])) * ((int32_t) in[0]); + output[6] = 2 * (((limb) ((int32_t) in2[3])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[1])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[5])) * ((int32_t) in[1])) + + ((limb) ((int32_t) in2[2])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in2[4])) * ((int32_t) in[2]) + + ((limb) ((int32_t) in2[0])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in2[6])) * ((int32_t) in[0]); + output[7] = ((limb) ((int32_t) in2[3])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in2[4])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[2])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[5])) * ((int32_t) in[2]) + + ((limb) ((int32_t) in2[1])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in2[6])) * ((int32_t) in[1]) + + ((limb) ((int32_t) in2[0])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[7])) * ((int32_t) in[0]); + output[8] = ((limb) ((int32_t) in2[4])) * ((int32_t) in[4]) + + 2 * (((limb) ((int32_t) in2[3])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[5])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[1])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[7])) * ((int32_t) in[1])) + + ((limb) ((int32_t) in2[2])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in2[6])) * ((int32_t) in[2]) + + ((limb) ((int32_t) in2[0])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in2[8])) * ((int32_t) in[0]); + output[9] = ((limb) ((int32_t) in2[4])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[5])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in2[3])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in2[6])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[2])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[7])) * ((int32_t) in[2]) + + ((limb) ((int32_t) in2[1])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in2[8])) * ((int32_t) in[1]) + + ((limb) ((int32_t) in2[0])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[0]); + output[10] = 2 * (((limb) ((int32_t) in2[5])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[3])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[7])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[1])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[1])) + + ((limb) ((int32_t) in2[4])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in2[6])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in2[2])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in2[8])) * ((int32_t) in[2]); + output[11] = ((limb) ((int32_t) in2[5])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in2[6])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[4])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[7])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in2[3])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in2[8])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[2])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[2]); + output[12] = ((limb) ((int32_t) in2[6])) * ((int32_t) in[6]) + + 2 * (((limb) ((int32_t) in2[5])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[7])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[3])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[3])) + + ((limb) ((int32_t) in2[4])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in2[8])) * ((int32_t) in[4]); + output[13] = ((limb) ((int32_t) in2[6])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[7])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in2[5])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in2[8])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[4])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[4]); + output[14] = 2 * (((limb) ((int32_t) in2[7])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[5])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[5])) + + ((limb) ((int32_t) in2[6])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in2[8])) * ((int32_t) in[6]); + output[15] = ((limb) ((int32_t) in2[7])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in2[8])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[6])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[6]); + output[16] = ((limb) ((int32_t) in2[8])) * ((int32_t) in[8]) + + 2 * (((limb) ((int32_t) in2[7])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[7])); + output[17] = ((limb) ((int32_t) in2[8])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[8]); + output[18] = 2 * ((limb) ((int32_t) in2[9])) * ((int32_t) in[9]); +} + +/* Reduce a long form to a short form by taking the input mod 2^255 - 19. + * + * On entry: |output[i]| < 14*2^54 + * On exit: |output[0..8]| < 280*2^54 */ +static void freduce_degree(limb *output) +{ + /* Each of these shifts and adds ends up multiplying the value by 19. + * + * For output[0..8], the absolute entry value is < 14*2^54 and we add, at + * most, 19*14*2^54 thus, on exit, |output[0..8]| < 280*2^54. */ + output[8] += output[18] << 4; + output[8] += output[18] << 1; + output[8] += output[18]; + output[7] += output[17] << 4; + output[7] += output[17] << 1; + output[7] += output[17]; + output[6] += output[16] << 4; + output[6] += output[16] << 1; + output[6] += output[16]; + output[5] += output[15] << 4; + output[5] += output[15] << 1; + output[5] += output[15]; + output[4] += output[14] << 4; + output[4] += output[14] << 1; + output[4] += output[14]; + output[3] += output[13] << 4; + output[3] += output[13] << 1; + output[3] += output[13]; + output[2] += output[12] << 4; + output[2] += output[12] << 1; + output[2] += output[12]; + output[1] += output[11] << 4; + output[1] += output[11] << 1; + output[1] += output[11]; + output[0] += output[10] << 4; + output[0] += output[10] << 1; + output[0] += output[10]; +} + +#if (-1 & 3) != 3 +#error "This code only works on a two's complement system" +#endif + +/* return v / 2^26, using only shifts and adds. + * + * On entry: v can take any value. */ +static inline limb div_by_2_26(const limb v) +{ + /* High word of v; no shift needed. */ + const uint32_t highword = (uint32_t) (((uint64_t) v) >> 32); + /* Set to all 1s if v was negative; else set to 0s. */ + const int32_t sign = ((int32_t) highword) >> 31; + /* Set to 0x3ffffff if v was negative; else set to 0. */ + const int32_t roundoff = ((uint32_t) sign) >> 6; + /* Should return v / (1<<26) */ + return (v + roundoff) >> 26; +} + +/* return v / (2^25), using only shifts and adds. + * + * On entry: v can take any value. */ +static inline limb div_by_2_25(const limb v) +{ + /* High word of v; no shift needed*/ + const uint32_t highword = (uint32_t) (((uint64_t) v) >> 32); + /* Set to all 1s if v was negative; else set to 0s. */ + const int32_t sign = ((int32_t) highword) >> 31; + /* Set to 0x1ffffff if v was negative; else set to 0. */ + const int32_t roundoff = ((uint32_t) sign) >> 7; + /* Should return v / (1<<25) */ + return (v + roundoff) >> 25; +} + +/* Reduce all coefficients of the short form input so that |x| < 2^26. + * + * On entry: |output[i]| < 280*2^54 */ +static void freduce_coefficients(limb *output) +{ + unsigned i; + + output[10] = 0; + + for (i = 0; i < 10; i += 2) { + limb over = div_by_2_26(output[i]); + /* The entry condition (that |output[i]| < 280*2^54) means that over is, at + * most, 280*2^28 in the first iteration of this loop. This is added to the + * next limb and we can approximate the resulting bound of that limb by + * 281*2^54. */ + output[i] -= over << 26; + output[i+1] += over; + + /* For the first iteration, |output[i+1]| < 281*2^54, thus |over| < + * 281*2^29. When this is added to the next limb, the resulting bound can + * be approximated as 281*2^54. + * + * For subsequent iterations of the loop, 281*2^54 remains a conservative + * bound and no overflow occurs. */ + over = div_by_2_25(output[i+1]); + output[i+1] -= over << 25; + output[i+2] += over; + } + /* Now |output[10]| < 281*2^29 and all other coefficients are reduced. */ + output[0] += output[10] << 4; + output[0] += output[10] << 1; + output[0] += output[10]; + + output[10] = 0; + + /* Now output[1..9] are reduced, and |output[0]| < 2^26 + 19*281*2^29 + * So |over| will be no more than 2^16. */ + { + limb over = div_by_2_26(output[0]); + output[0] -= over << 26; + output[1] += over; + } + + /* Now output[0,2..9] are reduced, and |output[1]| < 2^25 + 2^16 < 2^26. The + * bound on |output[1]| is sufficient to meet our needs. */ +} + +/* A helpful wrapper around fproduct: output = in * in2. + * + * On entry: |in[i]| < 2^27 and |in2[i]| < 2^27. + * + * output must be distinct to both inputs. The output is reduced degree + * (indeed, one need only provide storage for 10 limbs) and |output[i]| < 2^26. */ +static void fmul(limb *output, const limb *in, const limb *in2) +{ + limb t[19]; + fproduct(t, in, in2); + /* |t[i]| < 14*2^54 */ + freduce_degree(t); + freduce_coefficients(t); + /* |t[i]| < 2^26 */ + memcpy(output, t, sizeof(limb) * 10); +} + +/* Square a number: output = in**2 + * + * output must be distinct from the input. The inputs are reduced coefficient + * form, the output is not. + * + * output[x] <= 14 * the largest product of the input limbs. */ +static void fsquare_inner(limb *output, const limb *in) +{ + output[0] = ((limb) ((int32_t) in[0])) * ((int32_t) in[0]); + output[1] = 2 * ((limb) ((int32_t) in[0])) * ((int32_t) in[1]); + output[2] = 2 * (((limb) ((int32_t) in[1])) * ((int32_t) in[1]) + + ((limb) ((int32_t) in[0])) * ((int32_t) in[2])); + output[3] = 2 * (((limb) ((int32_t) in[1])) * ((int32_t) in[2]) + + ((limb) ((int32_t) in[0])) * ((int32_t) in[3])); + output[4] = ((limb) ((int32_t) in[2])) * ((int32_t) in[2]) + + 4 * ((limb) ((int32_t) in[1])) * ((int32_t) in[3]) + + 2 * ((limb) ((int32_t) in[0])) * ((int32_t) in[4]); + output[5] = 2 * (((limb) ((int32_t) in[2])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in[1])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in[0])) * ((int32_t) in[5])); + output[6] = 2 * (((limb) ((int32_t) in[3])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in[2])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in[0])) * ((int32_t) in[6]) + + 2 * ((limb) ((int32_t) in[1])) * ((int32_t) in[5])); + output[7] = 2 * (((limb) ((int32_t) in[3])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in[2])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in[1])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in[0])) * ((int32_t) in[7])); + output[8] = ((limb) ((int32_t) in[4])) * ((int32_t) in[4]) + + 2 * (((limb) ((int32_t) in[2])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in[0])) * ((int32_t) in[8]) + + 2 * (((limb) ((int32_t) in[1])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in[3])) * ((int32_t) in[5]))); + output[9] = 2 * (((limb) ((int32_t) in[4])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in[3])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in[2])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in[1])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in[0])) * ((int32_t) in[9])); + output[10] = 2 * (((limb) ((int32_t) in[5])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in[4])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in[2])) * ((int32_t) in[8]) + + 2 * (((limb) ((int32_t) in[3])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in[1])) * ((int32_t) in[9]))); + output[11] = 2 * (((limb) ((int32_t) in[5])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in[4])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in[3])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in[2])) * ((int32_t) in[9])); + output[12] = ((limb) ((int32_t) in[6])) * ((int32_t) in[6]) + + 2 * (((limb) ((int32_t) in[4])) * ((int32_t) in[8]) + + 2 * (((limb) ((int32_t) in[5])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in[3])) * ((int32_t) in[9]))); + output[13] = 2 * (((limb) ((int32_t) in[6])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in[5])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in[4])) * ((int32_t) in[9])); + output[14] = 2 * (((limb) ((int32_t) in[7])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in[6])) * ((int32_t) in[8]) + + 2 * ((limb) ((int32_t) in[5])) * ((int32_t) in[9])); + output[15] = 2 * (((limb) ((int32_t) in[7])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in[6])) * ((int32_t) in[9])); + output[16] = ((limb) ((int32_t) in[8])) * ((int32_t) in[8]) + + 4 * ((limb) ((int32_t) in[7])) * ((int32_t) in[9]); + output[17] = 2 * ((limb) ((int32_t) in[8])) * ((int32_t) in[9]); + output[18] = 2 * ((limb) ((int32_t) in[9])) * ((int32_t) in[9]); +} + +/* fsquare sets output = in^2. + * + * On entry: The |in| argument is in reduced coefficients form and |in[i]| < + * 2^27. + * + * On exit: The |output| argument is in reduced coefficients form (indeed, one + * need only provide storage for 10 limbs) and |out[i]| < 2^26. */ +static void fsquare(limb *output, const limb *in) +{ + limb t[19]; + fsquare_inner(t, in); + /* |t[i]| < 14*2^54 because the largest product of two limbs will be < + * 2^(27+27) and fsquare_inner adds together, at most, 14 of those + * products. */ + freduce_degree(t); + freduce_coefficients(t); + /* |t[i]| < 2^26 */ + memcpy(output, t, sizeof(limb) * 10); +} + +/* Take a little-endian, 32-byte number and expand it into polynomial form */ +static inline void fexpand(limb *output, const uint8_t *input) +{ +#define F(n,start,shift,mask) \ + output[n] = ((((limb) input[start + 0]) | \ + ((limb) input[start + 1]) << 8 | \ + ((limb) input[start + 2]) << 16 | \ + ((limb) input[start + 3]) << 24) >> shift) & mask; + F(0, 0, 0, 0x3ffffff); + F(1, 3, 2, 0x1ffffff); + F(2, 6, 3, 0x3ffffff); + F(3, 9, 5, 0x1ffffff); + F(4, 12, 6, 0x3ffffff); + F(5, 16, 0, 0x1ffffff); + F(6, 19, 1, 0x3ffffff); + F(7, 22, 3, 0x1ffffff); + F(8, 25, 4, 0x3ffffff); + F(9, 28, 6, 0x1ffffff); +#undef F +} + +#if (-32 >> 1) != -16 +#error "This code only works when >> does sign-extension on negative numbers" +#endif + +/* int32_t_eq returns 0xffffffff iff a == b and zero otherwise. */ +static int32_t int32_t_eq(int32_t a, int32_t b) +{ + a = ~(a ^ b); + a &= a << 16; + a &= a << 8; + a &= a << 4; + a &= a << 2; + a &= a << 1; + return a >> 31; +} + +/* int32_t_gte returns 0xffffffff if a >= b and zero otherwise, where a and b are + * both non-negative. */ +static int32_t int32_t_gte(int32_t a, int32_t b) +{ + a -= b; + /* a >= 0 iff a >= b. */ + return ~(a >> 31); +} + +/* Take a fully reduced polynomial form number and contract it into a + * little-endian, 32-byte array. + * + * On entry: |input_limbs[i]| < 2^26 */ +static void fcontract(uint8_t *output, limb *input_limbs) +{ + int i; + int j; + int32_t input[10]; + int32_t mask; + + /* |input_limbs[i]| < 2^26, so it's valid to convert to an int32_t. */ + for (i = 0; i < 10; i++) { + input[i] = input_limbs[i]; + } + + for (j = 0; j < 2; ++j) { + for (i = 0; i < 9; ++i) { + if ((i & 1) == 1) { + /* This calculation is a time-invariant way to make input[i] + * non-negative by borrowing from the next-larger limb. */ + const int32_t mask = input[i] >> 31; + const int32_t carry = -((input[i] & mask) >> 25); + input[i] = input[i] + (carry << 25); + input[i+1] = input[i+1] - carry; + } else { + const int32_t mask = input[i] >> 31; + const int32_t carry = -((input[i] & mask) >> 26); + input[i] = input[i] + (carry << 26); + input[i+1] = input[i+1] - carry; + } + } + + /* There's no greater limb for input[9] to borrow from, but we can multiply + * by 19 and borrow from input[0], which is valid mod 2^255-19. */ + { + const int32_t mask = input[9] >> 31; + const int32_t carry = -((input[9] & mask) >> 25); + input[9] = input[9] + (carry << 25); + input[0] = input[0] - (carry * 19); + } + + /* After the first iteration, input[1..9] are non-negative and fit within + * 25 or 26 bits, depending on position. However, input[0] may be + * negative. */ + } + + /* The first borrow-propagation pass above ended with every limb + except (possibly) input[0] non-negative. + If input[0] was negative after the first pass, then it was because of a + carry from input[9]. On entry, input[9] < 2^26 so the carry was, at most, + one, since (2**26-1) >> 25 = 1. Thus input[0] >= -19. + In the second pass, each limb is decreased by at most one. Thus the second + borrow-propagation pass could only have wrapped around to decrease + input[0] again if the first pass left input[0] negative *and* input[1] + through input[9] were all zero. In that case, input[1] is now 2^25 - 1, + and this last borrow-propagation step will leave input[1] non-negative. */ + { + const int32_t mask = input[0] >> 31; + const int32_t carry = -((input[0] & mask) >> 26); + input[0] = input[0] + (carry << 26); + input[1] = input[1] - carry; + } + + /* All input[i] are now non-negative. However, there might be values between + * 2^25 and 2^26 in a limb which is, nominally, 25 bits wide. */ + for (j = 0; j < 2; j++) { + for (i = 0; i < 9; i++) { + if ((i & 1) == 1) { + const int32_t carry = input[i] >> 25; + input[i] &= 0x1ffffff; + input[i+1] += carry; + } else { + const int32_t carry = input[i] >> 26; + input[i] &= 0x3ffffff; + input[i+1] += carry; + } + } + + { + const int32_t carry = input[9] >> 25; + input[9] &= 0x1ffffff; + input[0] += 19*carry; + } + } + + /* If the first carry-chain pass, just above, ended up with a carry from + * input[9], and that caused input[0] to be out-of-bounds, then input[0] was + * < 2^26 + 2*19, because the carry was, at most, two. + * + * If the second pass carried from input[9] again then input[0] is < 2*19 and + * the input[9] -> input[0] carry didn't push input[0] out of bounds. */ + + /* It still remains the case that input might be between 2^255-19 and 2^255. + * In this case, input[1..9] must take their maximum value and input[0] must + * be >= (2^255-19) & 0x3ffffff, which is 0x3ffffed. */ + mask = int32_t_gte(input[0], 0x3ffffed); + for (i = 1; i < 10; i++) { + if ((i & 1) == 1) { + mask &= int32_t_eq(input[i], 0x1ffffff); + } else { + mask &= int32_t_eq(input[i], 0x3ffffff); + } + } + + /* mask is either 0xffffffff (if input >= 2^255-19) and zero otherwise. Thus + * this conditionally subtracts 2^255-19. */ + input[0] -= mask & 0x3ffffed; + + for (i = 1; i < 10; i++) { + if ((i & 1) == 1) { + input[i] -= mask & 0x1ffffff; + } else { + input[i] -= mask & 0x3ffffff; + } + } + + input[1] <<= 2; + input[2] <<= 3; + input[3] <<= 5; + input[4] <<= 6; + input[6] <<= 1; + input[7] <<= 3; + input[8] <<= 4; + input[9] <<= 6; +#define F(i, s) \ + output[s+0] |= input[i] & 0xff; \ + output[s+1] = (input[i] >> 8) & 0xff; \ + output[s+2] = (input[i] >> 16) & 0xff; \ + output[s+3] = (input[i] >> 24) & 0xff; + output[0] = 0; + output[16] = 0; + F(0,0); + F(1,3); + F(2,6); + F(3,9); + F(4,12); + F(5,16); + F(6,19); + F(7,22); + F(8,25); + F(9,28); +#undef F +} + +/* Input: Q, Q', Q-Q' + * Output: 2Q, Q+Q' + * + * x2 z3: long form + * x3 z3: long form + * x z: short form, destroyed + * xprime zprime: short form, destroyed + * qmqp: short form, preserved + * + * On entry and exit, the absolute value of the limbs of all inputs and outputs + * are < 2^26. */ +static void fmonty(limb *x2, limb *z2, /* output 2Q */ + limb *x3, limb *z3, /* output Q + Q' */ + limb *x, limb *z, /* input Q */ + limb *xprime, limb *zprime, /* input Q' */ + const limb *qmqp /* input Q - Q' */) +{ + limb origx[10], origxprime[10], zzz[19], xx[19], zz[19], xxprime[19], + zzprime[19], zzzprime[19], xxxprime[19]; + + memcpy(origx, x, 10 * sizeof(limb)); + fsum(x, z); + /* |x[i]| < 2^27 */ + fdifference(z, origx); /* does x - z */ + /* |z[i]| < 2^27 */ + + memcpy(origxprime, xprime, sizeof(limb) * 10); + fsum(xprime, zprime); + /* |xprime[i]| < 2^27 */ + fdifference(zprime, origxprime); + /* |zprime[i]| < 2^27 */ + fproduct(xxprime, xprime, z); + /* |xxprime[i]| < 14*2^54: the largest product of two limbs will be < + * 2^(27+27) and fproduct adds together, at most, 14 of those products. + * (Approximating that to 2^58 doesn't work out.) */ + fproduct(zzprime, x, zprime); + /* |zzprime[i]| < 14*2^54 */ + freduce_degree(xxprime); + freduce_coefficients(xxprime); + /* |xxprime[i]| < 2^26 */ + freduce_degree(zzprime); + freduce_coefficients(zzprime); + /* |zzprime[i]| < 2^26 */ + memcpy(origxprime, xxprime, sizeof(limb) * 10); + fsum(xxprime, zzprime); + /* |xxprime[i]| < 2^27 */ + fdifference(zzprime, origxprime); + /* |zzprime[i]| < 2^27 */ + fsquare(xxxprime, xxprime); + /* |xxxprime[i]| < 2^26 */ + fsquare(zzzprime, zzprime); + /* |zzzprime[i]| < 2^26 */ + fproduct(zzprime, zzzprime, qmqp); + /* |zzprime[i]| < 14*2^52 */ + freduce_degree(zzprime); + freduce_coefficients(zzprime); + /* |zzprime[i]| < 2^26 */ + memcpy(x3, xxxprime, sizeof(limb) * 10); + memcpy(z3, zzprime, sizeof(limb) * 10); + + fsquare(xx, x); + /* |xx[i]| < 2^26 */ + fsquare(zz, z); + /* |zz[i]| < 2^26 */ + fproduct(x2, xx, zz); + /* |x2[i]| < 14*2^52 */ + freduce_degree(x2); + freduce_coefficients(x2); + /* |x2[i]| < 2^26 */ + fdifference(zz, xx); // does zz = xx - zz + /* |zz[i]| < 2^27 */ + memset(zzz + 10, 0, sizeof(limb) * 9); + fscalar_product(zzz, zz, 121665); + /* |zzz[i]| < 2^(27+17) */ + /* No need to call freduce_degree here: + fscalar_product doesn't increase the degree of its input. */ + freduce_coefficients(zzz); + /* |zzz[i]| < 2^26 */ + fsum(zzz, xx); + /* |zzz[i]| < 2^27 */ + fproduct(z2, zz, zzz); + /* |z2[i]| < 14*2^(26+27) */ + freduce_degree(z2); + freduce_coefficients(z2); + /* |z2|i| < 2^26 */ +} + +/* Conditionally swap two reduced-form limb arrays if 'iswap' is 1, but leave + * them unchanged if 'iswap' is 0. Runs in data-invariant time to avoid + * side-channel attacks. + * + * NOTE that this function requires that 'iswap' be 1 or 0; other values give + * wrong results. Also, the two limb arrays must be in reduced-coefficient, + * reduced-degree form: the values in a[10..19] or b[10..19] aren't swapped, + * and all all values in a[0..9],b[0..9] must have magnitude less than + * INT32_MAX. */ +static void swap_conditional(limb a[19], limb b[19], limb iswap) +{ + unsigned i; + const int32_t swap = (int32_t) -iswap; + + for (i = 0; i < 10; ++i) { + const int32_t x = swap & ( ((int32_t)a[i]) ^ ((int32_t)b[i]) ); + a[i] = ((int32_t)a[i]) ^ x; + b[i] = ((int32_t)b[i]) ^ x; + } +} + +/* Calculates nQ where Q is the x-coordinate of a point on the curve + * + * resultx/resultz: the x coordinate of the resulting curve point (short form) + * n: a little endian, 32-byte number + * q: a point of the curve (short form) */ +static void cmult(limb *resultx, limb *resultz, const uint8_t *n, const limb *q) +{ + limb a[19] = {0}, b[19] = {1}, c[19] = {1}, d[19] = {0}; + limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t; + limb e[19] = {0}, f[19] = {1}, g[19] = {0}, h[19] = {1}; + limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h; + + unsigned i, j; + + memcpy(nqpqx, q, sizeof(limb) * 10); + + for (i = 0; i < 32; ++i) { + uint8_t byte = n[31 - i]; + for (j = 0; j < 8; ++j) { + const limb bit = byte >> 7; + + swap_conditional(nqx, nqpqx, bit); + swap_conditional(nqz, nqpqz, bit); + fmonty(nqx2, nqz2, + nqpqx2, nqpqz2, + nqx, nqz, + nqpqx, nqpqz, + q); + swap_conditional(nqx2, nqpqx2, bit); + swap_conditional(nqz2, nqpqz2, bit); + + t = nqx; + nqx = nqx2; + nqx2 = t; + t = nqz; + nqz = nqz2; + nqz2 = t; + t = nqpqx; + nqpqx = nqpqx2; + nqpqx2 = t; + t = nqpqz; + nqpqz = nqpqz2; + nqpqz2 = t; + + byte <<= 1; + } + } + + memcpy(resultx, nqx, sizeof(limb) * 10); + memcpy(resultz, nqz, sizeof(limb) * 10); +} + +static void crecip(limb *out, const limb *z) +{ + limb z2[10]; + limb z9[10]; + limb z11[10]; + limb z2_5_0[10]; + limb z2_10_0[10]; + limb z2_20_0[10]; + limb z2_50_0[10]; + limb z2_100_0[10]; + limb t0[10]; + limb t1[10]; + int i; + + /* 2 */ fsquare(z2,z); + /* 4 */ fsquare(t1,z2); + /* 8 */ fsquare(t0,t1); + /* 9 */ fmul(z9,t0,z); + /* 11 */ fmul(z11,z9,z2); + /* 22 */ fsquare(t0,z11); + /* 2^5 - 2^0 = 31 */ fmul(z2_5_0,t0,z9); + + /* 2^6 - 2^1 */ fsquare(t0,z2_5_0); + /* 2^7 - 2^2 */ fsquare(t1,t0); + /* 2^8 - 2^3 */ fsquare(t0,t1); + /* 2^9 - 2^4 */ fsquare(t1,t0); + /* 2^10 - 2^5 */ fsquare(t0,t1); + /* 2^10 - 2^0 */ fmul(z2_10_0,t0,z2_5_0); + + /* 2^11 - 2^1 */ fsquare(t0,z2_10_0); + /* 2^12 - 2^2 */ fsquare(t1,t0); + /* 2^20 - 2^10 */ for (i = 2; i < 10; i += 2) { fsquare(t0,t1); fsquare(t1,t0); } + /* 2^20 - 2^0 */ fmul(z2_20_0,t1,z2_10_0); + + /* 2^21 - 2^1 */ fsquare(t0,z2_20_0); + /* 2^22 - 2^2 */ fsquare(t1,t0); + /* 2^40 - 2^20 */ for (i = 2; i < 20; i += 2) { fsquare(t0,t1); fsquare(t1,t0); } + /* 2^40 - 2^0 */ fmul(t0,t1,z2_20_0); + + /* 2^41 - 2^1 */ fsquare(t1,t0); + /* 2^42 - 2^2 */ fsquare(t0,t1); + /* 2^50 - 2^10 */ for (i = 2; i < 10; i += 2) { fsquare(t1,t0); fsquare(t0,t1); } + /* 2^50 - 2^0 */ fmul(z2_50_0,t0,z2_10_0); + + /* 2^51 - 2^1 */ fsquare(t0,z2_50_0); + /* 2^52 - 2^2 */ fsquare(t1,t0); + /* 2^100 - 2^50 */ for (i = 2; i < 50; i += 2) { fsquare(t0,t1); fsquare(t1,t0); } + /* 2^100 - 2^0 */ fmul(z2_100_0,t1,z2_50_0); + + /* 2^101 - 2^1 */ fsquare(t1,z2_100_0); + /* 2^102 - 2^2 */ fsquare(t0,t1); + /* 2^200 - 2^100 */ for (i = 2; i < 100; i += 2) { fsquare(t1,t0); fsquare(t0,t1); } + /* 2^200 - 2^0 */ fmul(t1,t0,z2_100_0); + + /* 2^201 - 2^1 */ fsquare(t0,t1); + /* 2^202 - 2^2 */ fsquare(t1,t0); + /* 2^250 - 2^50 */ for (i = 2; i < 50; i += 2) { fsquare(t0,t1); fsquare(t1,t0); } + /* 2^250 - 2^0 */ fmul(t0,t1,z2_50_0); + + /* 2^251 - 2^1 */ fsquare(t1,t0); + /* 2^252 - 2^2 */ fsquare(t0,t1); + /* 2^253 - 2^3 */ fsquare(t1,t0); + /* 2^254 - 2^4 */ fsquare(t0,t1); + /* 2^255 - 2^5 */ fsquare(t1,t0); + /* 2^255 - 21 */ fmul(out,t1,z11); +} + +void curve25519(uint8_t mypublic[CURVE25519_POINT_SIZE], const uint8_t secret[CURVE25519_POINT_SIZE], const uint8_t basepoint[CURVE25519_POINT_SIZE]) +{ + limb bp[10], x[10], z[11], zmone[10]; + uint8_t e[32]; + + memcpy(e, secret, 32); + normalize_secret(e); + + fexpand(bp, basepoint); + cmult(x, z, e, bp); + crecip(zmone, z); + fmul(z, x, zmone); + fcontract(mypublic, z); + + memzero_explicit(e, sizeof(e)); + memzero_explicit(bp, sizeof(bp)); + memzero_explicit(x, sizeof(x)); + memzero_explicit(z, sizeof(z)); + memzero_explicit(zmone, sizeof(zmone)); +} +#endif + + +void curve25519_generate_secret(uint8_t secret[CURVE25519_POINT_SIZE]) +{ + get_random_bytes(secret, CURVE25519_POINT_SIZE); + normalize_secret(secret); +} + +void curve25519_generate_public(uint8_t pub[CURVE25519_POINT_SIZE], const uint8_t secret[CURVE25519_POINT_SIZE]) +{ + static const uint8_t basepoint[CURVE25519_POINT_SIZE] = { 9 }; + curve25519(pub, secret, basepoint); +} + +#ifdef DEBUG +struct curve25519_test_vector { + uint8_t private[CURVE25519_POINT_SIZE]; + uint8_t public[CURVE25519_POINT_SIZE]; + uint8_t result[CURVE25519_POINT_SIZE]; +}; +static const struct curve25519_test_vector curve25519_test_vectors[] = { + { + .private = { 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d, 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45, 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a, 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a }, + .public = { 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4, 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37, 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d, 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f }, + .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 } + }, + { + .private = { 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b, 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6, 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd, 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb }, + .public = { 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54, 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a, 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4, 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a }, + .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 } + }, + { + .private = { 1 }, + .public = { 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64, 0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d, 0xb, 0x95, 0x48, 0xdc, 0xc, 0xd8, 0x19, 0x98, 0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f } + }, + { + .private = { 1 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f, 0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d, 0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x8, 0xed, 0xe3, 0xb, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 } + }, + { + .private = { 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 }, + .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, + .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 } + }, + { + .private = { 1, 2, 3, 4 }, + .public = { 0 }, + .result = { 0 } + }, + { + .private = { 2, 4, 6, 8 }, + .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8 }, + .result = { 0 } + } +}; +void curve25519_selftest(void) +{ + bool success = true; + size_t i = 0; + uint8_t out[CURVE25519_POINT_SIZE]; + + for (i = 0; i < ARRAY_SIZE(curve25519_test_vectors); ++i) { + memset(out, 0, CURVE25519_POINT_SIZE); + curve25519(out, curve25519_test_vectors[i].private, curve25519_test_vectors[i].public); + if (memcmp(out, curve25519_test_vectors[i].result, CURVE25519_POINT_SIZE)) { + pr_info("curve25519 self-test %zu: FAIL\n", i + 1); + success = false; + return; + } + } + + if (success) + pr_info("curve25519 self-tests: pass\n"); +} +#endif diff --git a/src/crypto/curve25519.h b/src/crypto/curve25519.h new file mode 100644 index 0000000..b8f68e7 --- /dev/null +++ b/src/crypto/curve25519.h @@ -0,0 +1,20 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef CURVE25519_H +#define CURVE25519_H + +#include + +enum curve25519_lengths { + CURVE25519_POINT_SIZE = 32 +}; + +void curve25519(uint8_t mypublic[CURVE25519_POINT_SIZE], const uint8_t secret[CURVE25519_POINT_SIZE], const uint8_t basepoint[CURVE25519_POINT_SIZE]); +void curve25519_generate_secret(uint8_t secret[CURVE25519_POINT_SIZE]); +void curve25519_generate_public(uint8_t pub[CURVE25519_POINT_SIZE], const uint8_t secret[CURVE25519_POINT_SIZE]); + +#ifdef DEBUG +void curve25519_selftest(void); +#endif + +#endif diff --git a/src/crypto/poly1305-avx2-x86_64.S b/src/crypto/poly1305-avx2-x86_64.S new file mode 100644 index 0000000..f5b8c8a --- /dev/null +++ b/src/crypto/poly1305-avx2-x86_64.S @@ -0,0 +1,386 @@ +/* + * Poly1305 authenticator algorithm, RFC7539, x64 AVX2 functions + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include + +.data +.align 32 + +ANMASK: .octa 0x0000000003ffffff0000000003ffffff + .octa 0x0000000003ffffff0000000003ffffff +ORMASK: .octa 0x00000000010000000000000001000000 + .octa 0x00000000010000000000000001000000 + +.text + +#define h0 0x00(%rdi) +#define h1 0x04(%rdi) +#define h2 0x08(%rdi) +#define h3 0x0c(%rdi) +#define h4 0x10(%rdi) +#define r0 0x00(%rdx) +#define r1 0x04(%rdx) +#define r2 0x08(%rdx) +#define r3 0x0c(%rdx) +#define r4 0x10(%rdx) +#define u0 0x00(%r8) +#define u1 0x04(%r8) +#define u2 0x08(%r8) +#define u3 0x0c(%r8) +#define u4 0x10(%r8) +#define w0 0x14(%r8) +#define w1 0x18(%r8) +#define w2 0x1c(%r8) +#define w3 0x20(%r8) +#define w4 0x24(%r8) +#define y0 0x28(%r8) +#define y1 0x2c(%r8) +#define y2 0x30(%r8) +#define y3 0x34(%r8) +#define y4 0x38(%r8) +#define m %rsi +#define hc0 %ymm0 +#define hc1 %ymm1 +#define hc2 %ymm2 +#define hc3 %ymm3 +#define hc4 %ymm4 +#define hc0x %xmm0 +#define hc1x %xmm1 +#define hc2x %xmm2 +#define hc3x %xmm3 +#define hc4x %xmm4 +#define t1 %ymm5 +#define t2 %ymm6 +#define t1x %xmm5 +#define t2x %xmm6 +#define ruwy0 %ymm7 +#define ruwy1 %ymm8 +#define ruwy2 %ymm9 +#define ruwy3 %ymm10 +#define ruwy4 %ymm11 +#define ruwy0x %xmm7 +#define ruwy1x %xmm8 +#define ruwy2x %xmm9 +#define ruwy3x %xmm10 +#define ruwy4x %xmm11 +#define svxz1 %ymm12 +#define svxz2 %ymm13 +#define svxz3 %ymm14 +#define svxz4 %ymm15 +#define d0 %r9 +#define d1 %r10 +#define d2 %r11 +#define d3 %r12 +#define d4 %r13 + +ENTRY(poly1305_asm_4block_avx2) + # %rdi: Accumulator h[5] + # %rsi: 64 byte input block m + # %rdx: Poly1305 key r[5] + # %rcx: Quadblock count + # %r8: Poly1305 derived key r^2 u[5], r^3 w[5], r^4 y[5], + + # This four-block variant uses loop unrolled block processing. It + # requires 4 Poly1305 keys: r, r^2, r^3 and r^4: + # h = (h + m) * r => h = (h + m1) * r^4 + m2 * r^3 + m3 * r^2 + m4 * r + + vzeroupper + push %rbx + push %r12 + push %r13 + + # combine r0,u0,w0,y0 + vmovd y0,ruwy0x + vmovd w0,t1x + vpunpcklqdq t1,ruwy0,ruwy0 + vmovd u0,t1x + vmovd r0,t2x + vpunpcklqdq t2,t1,t1 + vperm2i128 $0x20,t1,ruwy0,ruwy0 + + # combine r1,u1,w1,y1 and s1=r1*5,v1=u1*5,x1=w1*5,z1=y1*5 + vmovd y1,ruwy1x + vmovd w1,t1x + vpunpcklqdq t1,ruwy1,ruwy1 + vmovd u1,t1x + vmovd r1,t2x + vpunpcklqdq t2,t1,t1 + vperm2i128 $0x20,t1,ruwy1,ruwy1 + vpslld $2,ruwy1,svxz1 + vpaddd ruwy1,svxz1,svxz1 + + # combine r2,u2,w2,y2 and s2=r2*5,v2=u2*5,x2=w2*5,z2=y2*5 + vmovd y2,ruwy2x + vmovd w2,t1x + vpunpcklqdq t1,ruwy2,ruwy2 + vmovd u2,t1x + vmovd r2,t2x + vpunpcklqdq t2,t1,t1 + vperm2i128 $0x20,t1,ruwy2,ruwy2 + vpslld $2,ruwy2,svxz2 + vpaddd ruwy2,svxz2,svxz2 + + # combine r3,u3,w3,y3 and s3=r3*5,v3=u3*5,x3=w3*5,z3=y3*5 + vmovd y3,ruwy3x + vmovd w3,t1x + vpunpcklqdq t1,ruwy3,ruwy3 + vmovd u3,t1x + vmovd r3,t2x + vpunpcklqdq t2,t1,t1 + vperm2i128 $0x20,t1,ruwy3,ruwy3 + vpslld $2,ruwy3,svxz3 + vpaddd ruwy3,svxz3,svxz3 + + # combine r4,u4,w4,y4 and s4=r4*5,v4=u4*5,x4=w4*5,z4=y4*5 + vmovd y4,ruwy4x + vmovd w4,t1x + vpunpcklqdq t1,ruwy4,ruwy4 + vmovd u4,t1x + vmovd r4,t2x + vpunpcklqdq t2,t1,t1 + vperm2i128 $0x20,t1,ruwy4,ruwy4 + vpslld $2,ruwy4,svxz4 + vpaddd ruwy4,svxz4,svxz4 + +.Ldoblock4: + # hc0 = [m[48-51] & 0x3ffffff, m[32-35] & 0x3ffffff, + # m[16-19] & 0x3ffffff, m[ 0- 3] & 0x3ffffff + h0] + vmovd 0x00(m),hc0x + vmovd 0x10(m),t1x + vpunpcklqdq t1,hc0,hc0 + vmovd 0x20(m),t1x + vmovd 0x30(m),t2x + vpunpcklqdq t2,t1,t1 + vperm2i128 $0x20,t1,hc0,hc0 + vpand ANMASK(%rip),hc0,hc0 + vmovd h0,t1x + vpaddd t1,hc0,hc0 + # hc1 = [(m[51-54] >> 2) & 0x3ffffff, (m[35-38] >> 2) & 0x3ffffff, + # (m[19-22] >> 2) & 0x3ffffff, (m[ 3- 6] >> 2) & 0x3ffffff + h1] + vmovd 0x03(m),hc1x + vmovd 0x13(m),t1x + vpunpcklqdq t1,hc1,hc1 + vmovd 0x23(m),t1x + vmovd 0x33(m),t2x + vpunpcklqdq t2,t1,t1 + vperm2i128 $0x20,t1,hc1,hc1 + vpsrld $2,hc1,hc1 + vpand ANMASK(%rip),hc1,hc1 + vmovd h1,t1x + vpaddd t1,hc1,hc1 + # hc2 = [(m[54-57] >> 4) & 0x3ffffff, (m[38-41] >> 4) & 0x3ffffff, + # (m[22-25] >> 4) & 0x3ffffff, (m[ 6- 9] >> 4) & 0x3ffffff + h2] + vmovd 0x06(m),hc2x + vmovd 0x16(m),t1x + vpunpcklqdq t1,hc2,hc2 + vmovd 0x26(m),t1x + vmovd 0x36(m),t2x + vpunpcklqdq t2,t1,t1 + vperm2i128 $0x20,t1,hc2,hc2 + vpsrld $4,hc2,hc2 + vpand ANMASK(%rip),hc2,hc2 + vmovd h2,t1x + vpaddd t1,hc2,hc2 + # hc3 = [(m[57-60] >> 6) & 0x3ffffff, (m[41-44] >> 6) & 0x3ffffff, + # (m[25-28] >> 6) & 0x3ffffff, (m[ 9-12] >> 6) & 0x3ffffff + h3] + vmovd 0x09(m),hc3x + vmovd 0x19(m),t1x + vpunpcklqdq t1,hc3,hc3 + vmovd 0x29(m),t1x + vmovd 0x39(m),t2x + vpunpcklqdq t2,t1,t1 + vperm2i128 $0x20,t1,hc3,hc3 + vpsrld $6,hc3,hc3 + vpand ANMASK(%rip),hc3,hc3 + vmovd h3,t1x + vpaddd t1,hc3,hc3 + # hc4 = [(m[60-63] >> 8) | (1<<24), (m[44-47] >> 8) | (1<<24), + # (m[28-31] >> 8) | (1<<24), (m[12-15] >> 8) | (1<<24) + h4] + vmovd 0x0c(m),hc4x + vmovd 0x1c(m),t1x + vpunpcklqdq t1,hc4,hc4 + vmovd 0x2c(m),t1x + vmovd 0x3c(m),t2x + vpunpcklqdq t2,t1,t1 + vperm2i128 $0x20,t1,hc4,hc4 + vpsrld $8,hc4,hc4 + vpor ORMASK(%rip),hc4,hc4 + vmovd h4,t1x + vpaddd t1,hc4,hc4 + + # t1 = [ hc0[3] * r0, hc0[2] * u0, hc0[1] * w0, hc0[0] * y0 ] + vpmuludq hc0,ruwy0,t1 + # t1 += [ hc1[3] * s4, hc1[2] * v4, hc1[1] * x4, hc1[0] * z4 ] + vpmuludq hc1,svxz4,t2 + vpaddq t2,t1,t1 + # t1 += [ hc2[3] * s3, hc2[2] * v3, hc2[1] * x3, hc2[0] * z3 ] + vpmuludq hc2,svxz3,t2 + vpaddq t2,t1,t1 + # t1 += [ hc3[3] * s2, hc3[2] * v2, hc3[1] * x2, hc3[0] * z2 ] + vpmuludq hc3,svxz2,t2 + vpaddq t2,t1,t1 + # t1 += [ hc4[3] * s1, hc4[2] * v1, hc4[1] * x1, hc4[0] * z1 ] + vpmuludq hc4,svxz1,t2 + vpaddq t2,t1,t1 + # d0 = t1[0] + t1[1] + t[2] + t[3] + vpermq $0xee,t1,t2 + vpaddq t2,t1,t1 + vpsrldq $8,t1,t2 + vpaddq t2,t1,t1 + vmovq t1x,d0 + + # t1 = [ hc0[3] * r1, hc0[2] * u1,hc0[1] * w1, hc0[0] * y1 ] + vpmuludq hc0,ruwy1,t1 + # t1 += [ hc1[3] * r0, hc1[2] * u0, hc1[1] * w0, hc1[0] * y0 ] + vpmuludq hc1,ruwy0,t2 + vpaddq t2,t1,t1 + # t1 += [ hc2[3] * s4, hc2[2] * v4, hc2[1] * x4, hc2[0] * z4 ] + vpmuludq hc2,svxz4,t2 + vpaddq t2,t1,t1 + # t1 += [ hc3[3] * s3, hc3[2] * v3, hc3[1] * x3, hc3[0] * z3 ] + vpmuludq hc3,svxz3,t2 + vpaddq t2,t1,t1 + # t1 += [ hc4[3] * s2, hc4[2] * v2, hc4[1] * x2, hc4[0] * z2 ] + vpmuludq hc4,svxz2,t2 + vpaddq t2,t1,t1 + # d1 = t1[0] + t1[1] + t1[3] + t1[4] + vpermq $0xee,t1,t2 + vpaddq t2,t1,t1 + vpsrldq $8,t1,t2 + vpaddq t2,t1,t1 + vmovq t1x,d1 + + # t1 = [ hc0[3] * r2, hc0[2] * u2, hc0[1] * w2, hc0[0] * y2 ] + vpmuludq hc0,ruwy2,t1 + # t1 += [ hc1[3] * r1, hc1[2] * u1, hc1[1] * w1, hc1[0] * y1 ] + vpmuludq hc1,ruwy1,t2 + vpaddq t2,t1,t1 + # t1 += [ hc2[3] * r0, hc2[2] * u0, hc2[1] * w0, hc2[0] * y0 ] + vpmuludq hc2,ruwy0,t2 + vpaddq t2,t1,t1 + # t1 += [ hc3[3] * s4, hc3[2] * v4, hc3[1] * x4, hc3[0] * z4 ] + vpmuludq hc3,svxz4,t2 + vpaddq t2,t1,t1 + # t1 += [ hc4[3] * s3, hc4[2] * v3, hc4[1] * x3, hc4[0] * z3 ] + vpmuludq hc4,svxz3,t2 + vpaddq t2,t1,t1 + # d2 = t1[0] + t1[1] + t1[2] + t1[3] + vpermq $0xee,t1,t2 + vpaddq t2,t1,t1 + vpsrldq $8,t1,t2 + vpaddq t2,t1,t1 + vmovq t1x,d2 + + # t1 = [ hc0[3] * r3, hc0[2] * u3, hc0[1] * w3, hc0[0] * y3 ] + vpmuludq hc0,ruwy3,t1 + # t1 += [ hc1[3] * r2, hc1[2] * u2, hc1[1] * w2, hc1[0] * y2 ] + vpmuludq hc1,ruwy2,t2 + vpaddq t2,t1,t1 + # t1 += [ hc2[3] * r1, hc2[2] * u1, hc2[1] * w1, hc2[0] * y1 ] + vpmuludq hc2,ruwy1,t2 + vpaddq t2,t1,t1 + # t1 += [ hc3[3] * r0, hc3[2] * u0, hc3[1] * w0, hc3[0] * y0 ] + vpmuludq hc3,ruwy0,t2 + vpaddq t2,t1,t1 + # t1 += [ hc4[3] * s4, hc4[2] * v4, hc4[1] * x4, hc4[0] * z4 ] + vpmuludq hc4,svxz4,t2 + vpaddq t2,t1,t1 + # d3 = t1[0] + t1[1] + t1[2] + t1[3] + vpermq $0xee,t1,t2 + vpaddq t2,t1,t1 + vpsrldq $8,t1,t2 + vpaddq t2,t1,t1 + vmovq t1x,d3 + + # t1 = [ hc0[3] * r4, hc0[2] * u4, hc0[1] * w4, hc0[0] * y4 ] + vpmuludq hc0,ruwy4,t1 + # t1 += [ hc1[3] * r3, hc1[2] * u3, hc1[1] * w3, hc1[0] * y3 ] + vpmuludq hc1,ruwy3,t2 + vpaddq t2,t1,t1 + # t1 += [ hc2[3] * r2, hc2[2] * u2, hc2[1] * w2, hc2[0] * y2 ] + vpmuludq hc2,ruwy2,t2 + vpaddq t2,t1,t1 + # t1 += [ hc3[3] * r1, hc3[2] * u1, hc3[1] * w1, hc3[0] * y1 ] + vpmuludq hc3,ruwy1,t2 + vpaddq t2,t1,t1 + # t1 += [ hc4[3] * r0, hc4[2] * u0, hc4[1] * w0, hc4[0] * y0 ] + vpmuludq hc4,ruwy0,t2 + vpaddq t2,t1,t1 + # d4 = t1[0] + t1[1] + t1[2] + t1[3] + vpermq $0xee,t1,t2 + vpaddq t2,t1,t1 + vpsrldq $8,t1,t2 + vpaddq t2,t1,t1 + vmovq t1x,d4 + + # d1 += d0 >> 26 + mov d0,%rax + shr $26,%rax + add %rax,d1 + # h0 = d0 & 0x3ffffff + mov d0,%rbx + and $0x3ffffff,%ebx + + # d2 += d1 >> 26 + mov d1,%rax + shr $26,%rax + add %rax,d2 + # h1 = d1 & 0x3ffffff + mov d1,%rax + and $0x3ffffff,%eax + mov %eax,h1 + + # d3 += d2 >> 26 + mov d2,%rax + shr $26,%rax + add %rax,d3 + # h2 = d2 & 0x3ffffff + mov d2,%rax + and $0x3ffffff,%eax + mov %eax,h2 + + # d4 += d3 >> 26 + mov d3,%rax + shr $26,%rax + add %rax,d4 + # h3 = d3 & 0x3ffffff + mov d3,%rax + and $0x3ffffff,%eax + mov %eax,h3 + + # h0 += (d4 >> 26) * 5 + mov d4,%rax + shr $26,%rax + lea (%eax,%eax,4),%eax + add %eax,%ebx + # h4 = d4 & 0x3ffffff + mov d4,%rax + and $0x3ffffff,%eax + mov %eax,h4 + + # h1 += h0 >> 26 + mov %ebx,%eax + shr $26,%eax + add %eax,h1 + # h0 = h0 & 0x3ffffff + andl $0x3ffffff,%ebx + mov %ebx,h0 + + add $0x40,m + dec %rcx + jnz .Ldoblock4 + + vzeroupper + pop %r13 + pop %r12 + pop %rbx + ret +ENDPROC(poly1305_asm_4block_avx2) diff --git a/src/crypto/poly1305-sse2-x86_64.S b/src/crypto/poly1305-sse2-x86_64.S new file mode 100644 index 0000000..97fa41f --- /dev/null +++ b/src/crypto/poly1305-sse2-x86_64.S @@ -0,0 +1,582 @@ +/* + * Poly1305 authenticator algorithm, RFC7539, x64 SSE2 functions + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include + +.data +.align 16 + +ANMASK: .octa 0x0000000003ffffff0000000003ffffff +ORMASK: .octa 0x00000000010000000000000001000000 + +.text + +#define h0 0x00(%rdi) +#define h1 0x04(%rdi) +#define h2 0x08(%rdi) +#define h3 0x0c(%rdi) +#define h4 0x10(%rdi) +#define r0 0x00(%rdx) +#define r1 0x04(%rdx) +#define r2 0x08(%rdx) +#define r3 0x0c(%rdx) +#define r4 0x10(%rdx) +#define s1 0x00(%rsp) +#define s2 0x04(%rsp) +#define s3 0x08(%rsp) +#define s4 0x0c(%rsp) +#define m %rsi +#define h01 %xmm0 +#define h23 %xmm1 +#define h44 %xmm2 +#define t1 %xmm3 +#define t2 %xmm4 +#define t3 %xmm5 +#define t4 %xmm6 +#define mask %xmm7 +#define d0 %r8 +#define d1 %r9 +#define d2 %r10 +#define d3 %r11 +#define d4 %r12 + +ENTRY(poly1305_asm_block_sse2) + # %rdi: Accumulator h[5] + # %rsi: 16 byte input block m + # %rdx: Poly1305 key r[5] + # %rcx: Block count + + # This single block variant tries to improve performance by doing two + # multiplications in parallel using SSE instructions. There is quite + # some quardword packing involved, hence the speedup is marginal. + + push %rbx + push %r12 + sub $0x10,%rsp + + # s1..s4 = r1..r4 * 5 + mov r1,%eax + lea (%eax,%eax,4),%eax + mov %eax,s1 + mov r2,%eax + lea (%eax,%eax,4),%eax + mov %eax,s2 + mov r3,%eax + lea (%eax,%eax,4),%eax + mov %eax,s3 + mov r4,%eax + lea (%eax,%eax,4),%eax + mov %eax,s4 + + movdqa ANMASK(%rip),mask + +.Ldoblock: + # h01 = [0, h1, 0, h0] + # h23 = [0, h3, 0, h2] + # h44 = [0, h4, 0, h4] + movd h0,h01 + movd h1,t1 + movd h2,h23 + movd h3,t2 + movd h4,h44 + punpcklqdq t1,h01 + punpcklqdq t2,h23 + punpcklqdq h44,h44 + + # h01 += [ (m[3-6] >> 2) & 0x3ffffff, m[0-3] & 0x3ffffff ] + movd 0x00(m),t1 + movd 0x03(m),t2 + psrld $2,t2 + punpcklqdq t2,t1 + pand mask,t1 + paddd t1,h01 + # h23 += [ (m[9-12] >> 6) & 0x3ffffff, (m[6-9] >> 4) & 0x3ffffff ] + movd 0x06(m),t1 + movd 0x09(m),t2 + psrld $4,t1 + psrld $6,t2 + punpcklqdq t2,t1 + pand mask,t1 + paddd t1,h23 + # h44 += [ (m[12-15] >> 8) | (1 << 24), (m[12-15] >> 8) | (1 << 24) ] + mov 0x0c(m),%eax + shr $8,%eax + or $0x01000000,%eax + movd %eax,t1 + pshufd $0xc4,t1,t1 + paddd t1,h44 + + # t1[0] = h0 * r0 + h2 * s3 + # t1[1] = h1 * s4 + h3 * s2 + movd r0,t1 + movd s4,t2 + punpcklqdq t2,t1 + pmuludq h01,t1 + movd s3,t2 + movd s2,t3 + punpcklqdq t3,t2 + pmuludq h23,t2 + paddq t2,t1 + # t2[0] = h0 * r1 + h2 * s4 + # t2[1] = h1 * r0 + h3 * s3 + movd r1,t2 + movd r0,t3 + punpcklqdq t3,t2 + pmuludq h01,t2 + movd s4,t3 + movd s3,t4 + punpcklqdq t4,t3 + pmuludq h23,t3 + paddq t3,t2 + # t3[0] = h4 * s1 + # t3[1] = h4 * s2 + movd s1,t3 + movd s2,t4 + punpcklqdq t4,t3 + pmuludq h44,t3 + # d0 = t1[0] + t1[1] + t3[0] + # d1 = t2[0] + t2[1] + t3[1] + movdqa t1,t4 + punpcklqdq t2,t4 + punpckhqdq t2,t1 + paddq t4,t1 + paddq t3,t1 + movq t1,d0 + psrldq $8,t1 + movq t1,d1 + + # t1[0] = h0 * r2 + h2 * r0 + # t1[1] = h1 * r1 + h3 * s4 + movd r2,t1 + movd r1,t2 + punpcklqdq t2,t1 + pmuludq h01,t1 + movd r0,t2 + movd s4,t3 + punpcklqdq t3,t2 + pmuludq h23,t2 + paddq t2,t1 + # t2[0] = h0 * r3 + h2 * r1 + # t2[1] = h1 * r2 + h3 * r0 + movd r3,t2 + movd r2,t3 + punpcklqdq t3,t2 + pmuludq h01,t2 + movd r1,t3 + movd r0,t4 + punpcklqdq t4,t3 + pmuludq h23,t3 + paddq t3,t2 + # t3[0] = h4 * s3 + # t3[1] = h4 * s4 + movd s3,t3 + movd s4,t4 + punpcklqdq t4,t3 + pmuludq h44,t3 + # d2 = t1[0] + t1[1] + t3[0] + # d3 = t2[0] + t2[1] + t3[1] + movdqa t1,t4 + punpcklqdq t2,t4 + punpckhqdq t2,t1 + paddq t4,t1 + paddq t3,t1 + movq t1,d2 + psrldq $8,t1 + movq t1,d3 + + # t1[0] = h0 * r4 + h2 * r2 + # t1[1] = h1 * r3 + h3 * r1 + movd r4,t1 + movd r3,t2 + punpcklqdq t2,t1 + pmuludq h01,t1 + movd r2,t2 + movd r1,t3 + punpcklqdq t3,t2 + pmuludq h23,t2 + paddq t2,t1 + # t3[0] = h4 * r0 + movd r0,t3 + pmuludq h44,t3 + # d4 = t1[0] + t1[1] + t3[0] + movdqa t1,t4 + psrldq $8,t4 + paddq t4,t1 + paddq t3,t1 + movq t1,d4 + + # d1 += d0 >> 26 + mov d0,%rax + shr $26,%rax + add %rax,d1 + # h0 = d0 & 0x3ffffff + mov d0,%rbx + and $0x3ffffff,%ebx + + # d2 += d1 >> 26 + mov d1,%rax + shr $26,%rax + add %rax,d2 + # h1 = d1 & 0x3ffffff + mov d1,%rax + and $0x3ffffff,%eax + mov %eax,h1 + + # d3 += d2 >> 26 + mov d2,%rax + shr $26,%rax + add %rax,d3 + # h2 = d2 & 0x3ffffff + mov d2,%rax + and $0x3ffffff,%eax + mov %eax,h2 + + # d4 += d3 >> 26 + mov d3,%rax + shr $26,%rax + add %rax,d4 + # h3 = d3 & 0x3ffffff + mov d3,%rax + and $0x3ffffff,%eax + mov %eax,h3 + + # h0 += (d4 >> 26) * 5 + mov d4,%rax + shr $26,%rax + lea (%eax,%eax,4),%eax + add %eax,%ebx + # h4 = d4 & 0x3ffffff + mov d4,%rax + and $0x3ffffff,%eax + mov %eax,h4 + + # h1 += h0 >> 26 + mov %ebx,%eax + shr $26,%eax + add %eax,h1 + # h0 = h0 & 0x3ffffff + andl $0x3ffffff,%ebx + mov %ebx,h0 + + add $0x10,m + dec %rcx + jnz .Ldoblock + + add $0x10,%rsp + pop %r12 + pop %rbx + ret +ENDPROC(poly1305_asm_block_sse2) + + +#define u0 0x00(%r8) +#define u1 0x04(%r8) +#define u2 0x08(%r8) +#define u3 0x0c(%r8) +#define u4 0x10(%r8) +#define hc0 %xmm0 +#define hc1 %xmm1 +#define hc2 %xmm2 +#define hc3 %xmm5 +#define hc4 %xmm6 +#define ru0 %xmm7 +#define ru1 %xmm8 +#define ru2 %xmm9 +#define ru3 %xmm10 +#define ru4 %xmm11 +#define sv1 %xmm12 +#define sv2 %xmm13 +#define sv3 %xmm14 +#define sv4 %xmm15 +#undef d0 +#define d0 %r13 + +ENTRY(poly1305_asm_2block_sse2) + # %rdi: Accumulator h[5] + # %rsi: 16 byte input block m + # %rdx: Poly1305 key r[5] + # %rcx: Doubleblock count + # %r8: Poly1305 derived key r^2 u[5] + + # This two-block variant further improves performance by using loop + # unrolled block processing. This is more straight forward and does + # less byte shuffling, but requires a second Poly1305 key r^2: + # h = (h + m) * r => h = (h + m1) * r^2 + m2 * r + + push %rbx + push %r12 + push %r13 + + # combine r0,u0 + movd u0,ru0 + movd r0,t1 + punpcklqdq t1,ru0 + + # combine r1,u1 and s1=r1*5,v1=u1*5 + movd u1,ru1 + movd r1,t1 + punpcklqdq t1,ru1 + movdqa ru1,sv1 + pslld $2,sv1 + paddd ru1,sv1 + + # combine r2,u2 and s2=r2*5,v2=u2*5 + movd u2,ru2 + movd r2,t1 + punpcklqdq t1,ru2 + movdqa ru2,sv2 + pslld $2,sv2 + paddd ru2,sv2 + + # combine r3,u3 and s3=r3*5,v3=u3*5 + movd u3,ru3 + movd r3,t1 + punpcklqdq t1,ru3 + movdqa ru3,sv3 + pslld $2,sv3 + paddd ru3,sv3 + + # combine r4,u4 and s4=r4*5,v4=u4*5 + movd u4,ru4 + movd r4,t1 + punpcklqdq t1,ru4 + movdqa ru4,sv4 + pslld $2,sv4 + paddd ru4,sv4 + +.Ldoblock2: + # hc0 = [ m[16-19] & 0x3ffffff, h0 + m[0-3] & 0x3ffffff ] + movd 0x00(m),hc0 + movd 0x10(m),t1 + punpcklqdq t1,hc0 + pand ANMASK(%rip),hc0 + movd h0,t1 + paddd t1,hc0 + # hc1 = [ (m[19-22] >> 2) & 0x3ffffff, h1 + (m[3-6] >> 2) & 0x3ffffff ] + movd 0x03(m),hc1 + movd 0x13(m),t1 + punpcklqdq t1,hc1 + psrld $2,hc1 + pand ANMASK(%rip),hc1 + movd h1,t1 + paddd t1,hc1 + # hc2 = [ (m[22-25] >> 4) & 0x3ffffff, h2 + (m[6-9] >> 4) & 0x3ffffff ] + movd 0x06(m),hc2 + movd 0x16(m),t1 + punpcklqdq t1,hc2 + psrld $4,hc2 + pand ANMASK(%rip),hc2 + movd h2,t1 + paddd t1,hc2 + # hc3 = [ (m[25-28] >> 6) & 0x3ffffff, h3 + (m[9-12] >> 6) & 0x3ffffff ] + movd 0x09(m),hc3 + movd 0x19(m),t1 + punpcklqdq t1,hc3 + psrld $6,hc3 + pand ANMASK(%rip),hc3 + movd h3,t1 + paddd t1,hc3 + # hc4 = [ (m[28-31] >> 8) | (1<<24), h4 + (m[12-15] >> 8) | (1<<24) ] + movd 0x0c(m),hc4 + movd 0x1c(m),t1 + punpcklqdq t1,hc4 + psrld $8,hc4 + por ORMASK(%rip),hc4 + movd h4,t1 + paddd t1,hc4 + + # t1 = [ hc0[1] * r0, hc0[0] * u0 ] + movdqa ru0,t1 + pmuludq hc0,t1 + # t1 += [ hc1[1] * s4, hc1[0] * v4 ] + movdqa sv4,t2 + pmuludq hc1,t2 + paddq t2,t1 + # t1 += [ hc2[1] * s3, hc2[0] * v3 ] + movdqa sv3,t2 + pmuludq hc2,t2 + paddq t2,t1 + # t1 += [ hc3[1] * s2, hc3[0] * v2 ] + movdqa sv2,t2 + pmuludq hc3,t2 + paddq t2,t1 + # t1 += [ hc4[1] * s1, hc4[0] * v1 ] + movdqa sv1,t2 + pmuludq hc4,t2 + paddq t2,t1 + # d0 = t1[0] + t1[1] + movdqa t1,t2 + psrldq $8,t2 + paddq t2,t1 + movq t1,d0 + + # t1 = [ hc0[1] * r1, hc0[0] * u1 ] + movdqa ru1,t1 + pmuludq hc0,t1 + # t1 += [ hc1[1] * r0, hc1[0] * u0 ] + movdqa ru0,t2 + pmuludq hc1,t2 + paddq t2,t1 + # t1 += [ hc2[1] * s4, hc2[0] * v4 ] + movdqa sv4,t2 + pmuludq hc2,t2 + paddq t2,t1 + # t1 += [ hc3[1] * s3, hc3[0] * v3 ] + movdqa sv3,t2 + pmuludq hc3,t2 + paddq t2,t1 + # t1 += [ hc4[1] * s2, hc4[0] * v2 ] + movdqa sv2,t2 + pmuludq hc4,t2 + paddq t2,t1 + # d1 = t1[0] + t1[1] + movdqa t1,t2 + psrldq $8,t2 + paddq t2,t1 + movq t1,d1 + + # t1 = [ hc0[1] * r2, hc0[0] * u2 ] + movdqa ru2,t1 + pmuludq hc0,t1 + # t1 += [ hc1[1] * r1, hc1[0] * u1 ] + movdqa ru1,t2 + pmuludq hc1,t2 + paddq t2,t1 + # t1 += [ hc2[1] * r0, hc2[0] * u0 ] + movdqa ru0,t2 + pmuludq hc2,t2 + paddq t2,t1 + # t1 += [ hc3[1] * s4, hc3[0] * v4 ] + movdqa sv4,t2 + pmuludq hc3,t2 + paddq t2,t1 + # t1 += [ hc4[1] * s3, hc4[0] * v3 ] + movdqa sv3,t2 + pmuludq hc4,t2 + paddq t2,t1 + # d2 = t1[0] + t1[1] + movdqa t1,t2 + psrldq $8,t2 + paddq t2,t1 + movq t1,d2 + + # t1 = [ hc0[1] * r3, hc0[0] * u3 ] + movdqa ru3,t1 + pmuludq hc0,t1 + # t1 += [ hc1[1] * r2, hc1[0] * u2 ] + movdqa ru2,t2 + pmuludq hc1,t2 + paddq t2,t1 + # t1 += [ hc2[1] * r1, hc2[0] * u1 ] + movdqa ru1,t2 + pmuludq hc2,t2 + paddq t2,t1 + # t1 += [ hc3[1] * r0, hc3[0] * u0 ] + movdqa ru0,t2 + pmuludq hc3,t2 + paddq t2,t1 + # t1 += [ hc4[1] * s4, hc4[0] * v4 ] + movdqa sv4,t2 + pmuludq hc4,t2 + paddq t2,t1 + # d3 = t1[0] + t1[1] + movdqa t1,t2 + psrldq $8,t2 + paddq t2,t1 + movq t1,d3 + + # t1 = [ hc0[1] * r4, hc0[0] * u4 ] + movdqa ru4,t1 + pmuludq hc0,t1 + # t1 += [ hc1[1] * r3, hc1[0] * u3 ] + movdqa ru3,t2 + pmuludq hc1,t2 + paddq t2,t1 + # t1 += [ hc2[1] * r2, hc2[0] * u2 ] + movdqa ru2,t2 + pmuludq hc2,t2 + paddq t2,t1 + # t1 += [ hc3[1] * r1, hc3[0] * u1 ] + movdqa ru1,t2 + pmuludq hc3,t2 + paddq t2,t1 + # t1 += [ hc4[1] * r0, hc4[0] * u0 ] + movdqa ru0,t2 + pmuludq hc4,t2 + paddq t2,t1 + # d4 = t1[0] + t1[1] + movdqa t1,t2 + psrldq $8,t2 + paddq t2,t1 + movq t1,d4 + + # d1 += d0 >> 26 + mov d0,%rax + shr $26,%rax + add %rax,d1 + # h0 = d0 & 0x3ffffff + mov d0,%rbx + and $0x3ffffff,%ebx + + # d2 += d1 >> 26 + mov d1,%rax + shr $26,%rax + add %rax,d2 + # h1 = d1 & 0x3ffffff + mov d1,%rax + and $0x3ffffff,%eax + mov %eax,h1 + + # d3 += d2 >> 26 + mov d2,%rax + shr $26,%rax + add %rax,d3 + # h2 = d2 & 0x3ffffff + mov d2,%rax + and $0x3ffffff,%eax + mov %eax,h2 + + # d4 += d3 >> 26 + mov d3,%rax + shr $26,%rax + add %rax,d4 + # h3 = d3 & 0x3ffffff + mov d3,%rax + and $0x3ffffff,%eax + mov %eax,h3 + + # h0 += (d4 >> 26) * 5 + mov d4,%rax + shr $26,%rax + lea (%eax,%eax,4),%eax + add %eax,%ebx + # h4 = d4 & 0x3ffffff + mov d4,%rax + and $0x3ffffff,%eax + mov %eax,h4 + + # h1 += h0 >> 26 + mov %ebx,%eax + shr $26,%eax + add %eax,h1 + # h0 = h0 & 0x3ffffff + andl $0x3ffffff,%ebx + mov %ebx,h0 + + add $0x20,m + dec %rcx + jnz .Ldoblock2 + + pop %r13 + pop %r12 + pop %rbx + ret +ENDPROC(poly1305_asm_2block_sse2) diff --git a/src/crypto/siphash24.c b/src/crypto/siphash24.c new file mode 100644 index 0000000..093c837 --- /dev/null +++ b/src/crypto/siphash24.c @@ -0,0 +1,155 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include "../wireguard.h" +#include "siphash24.h" +#include + +#define ROTL(x,b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) +#define U8TO64(p) le64_to_cpu(*(__le64 *)(p)) + +#define SIPROUND \ + do { \ + v0 += v1; v1 = ROTL(v1, 13); v1 ^= v0; v0 = ROTL(v0, 32); \ + v2 += v3; v3 = ROTL(v3, 16); v3 ^= v2; \ + v0 += v3; v3 = ROTL(v3, 21); v3 ^= v0; \ + v2 += v1; v1 = ROTL(v1, 17); v1 ^= v2; v2 = ROTL(v2, 32); \ + } while(0) + +__attribute__((optimize("unroll-loops"))) +uint64_t siphash24(const uint8_t *data, size_t len, const uint8_t key[SIPHASH24_KEY_LEN]) +{ + uint64_t v0 = 0x736f6d6570736575ULL; + uint64_t v1 = 0x646f72616e646f6dULL; + uint64_t v2 = 0x6c7967656e657261ULL; + uint64_t v3 = 0x7465646279746573ULL; + uint64_t b; + uint64_t k0 = U8TO64(key); + uint64_t k1 = U8TO64(key + sizeof(uint64_t)); + uint64_t m; + const uint8_t *end = data + len - (len % sizeof(uint64_t)); + const uint8_t left = len & (sizeof(uint64_t) - 1); + b = ((uint64_t)len) << 56; + v3 ^= k1; + v2 ^= k0; + v1 ^= k1; + v0 ^= k0; + for (; data != end; data += sizeof(uint64_t)) { + m = U8TO64(data); + v3 ^= m; + SIPROUND; + SIPROUND; + v0 ^= m; + } + switch (left) { + case 7: b |= ((uint64_t)data[6]) << 48; + case 6: b |= ((uint64_t)data[5]) << 40; + case 5: b |= ((uint64_t)data[4]) << 32; + case 4: b |= ((uint64_t)data[3]) << 24; + case 3: b |= ((uint64_t)data[2]) << 16; + case 2: b |= ((uint64_t)data[1]) << 8; + case 1: b |= ((uint64_t)data[0]); break; + case 0: break; + } + v3 ^= b; + SIPROUND; + SIPROUND; + v0 ^= b; + v2 ^= 0xff; + SIPROUND; + SIPROUND; + SIPROUND; + SIPROUND; + b = (v0 ^ v1) ^ (v2 ^ v3); + return (__force uint64_t)cpu_to_le64(b); +} + +#ifdef DEBUG +static const uint8_t test_vectors[64][8] = { + { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72 }, + { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74 }, + { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d }, + { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85 }, + { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf }, + { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18 }, + { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb }, + { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab }, + { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93 }, + { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e }, + { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a }, + { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4 }, + { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75 }, + { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14 }, + { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7 }, + { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1 }, + { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f }, + { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69 }, + { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b }, + { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb }, + { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe }, + { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0 }, + { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93 }, + { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8 }, + { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8 }, + { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc }, + { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17 }, + { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f }, + { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde }, + { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6 }, + { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad }, + { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32 }, + { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71 }, + { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7 }, + { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12 }, + { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15 }, + { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31 }, + { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02 }, + { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca }, + { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a }, + { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e }, + { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad }, + { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18 }, + { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4 }, + { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9 }, + { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9 }, + { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb }, + { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0 }, + { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6 }, + { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7 }, + { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee }, + { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1 }, + { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a }, + { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81 }, + { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f }, + { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24 }, + { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7 }, + { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea }, + { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60 }, + { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66 }, + { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c }, + { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f }, + { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5 }, + { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95 } +}; + +void siphash24_selftest(void) +{ + uint8_t in[64], k[16]; + uint64_t out; + bool success = true; + size_t i; + + for (i = 0; i < 16; ++i) + k[i] = (uint8_t)i; + + for (i = 0; i < 64; ++i) { + in[i] = (uint8_t)i; + out = siphash24(in, i, k); + if (memcmp(&out, test_vectors[i], 8)) { + pr_info("siphash24 self-test %zu: FAIL\n", i + 1); + success = false; + } + } + if (success) + pr_info("siphash24 self-tests: pass\n"); +} +#endif diff --git a/src/crypto/siphash24.h b/src/crypto/siphash24.h new file mode 100644 index 0000000..5ce5a3a --- /dev/null +++ b/src/crypto/siphash24.h @@ -0,0 +1,16 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef SIPHASH24_H +#define SIPHASH24_H + +enum siphash24_lengths { + SIPHASH24_KEY_LEN = 16 +}; + +uint64_t siphash24(const uint8_t *data, size_t len, const uint8_t key[SIPHASH24_KEY_LEN]); + +#ifdef DEBUG +void siphash24_selftest(void); +#endif + +#endif diff --git a/src/data.c b/src/data.c new file mode 100644 index 0000000..5b3c781 --- /dev/null +++ b/src/data.c @@ -0,0 +1,477 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include "wireguard.h" +#include "noise.h" +#include "messages.h" +#include "packets.h" +#include "hashtables.h" +#include +#include +#include +#include +#include +#include + +/* This is appendix C of RFC 2401 - a sliding window bitmap. */ +static inline bool counter_validate(union noise_counter *counter, u64 their_counter) +{ + bool ret = false; + u64 difference; + spin_lock_bh(&counter->receive.lock); + + if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 || their_counter >= REJECT_AFTER_MESSAGES)) + goto out; + + ++their_counter; + + if (likely(their_counter > counter->receive.counter)) { + difference = their_counter - counter->receive.counter; + if (likely(difference < BITS_PER_LONG)) { + counter->receive.backtrack <<= difference; + counter->receive.backtrack |= 1; + } else + counter->receive.backtrack = 1; + counter->receive.counter = their_counter; + ret = true; + goto out; + } + + difference = counter->receive.counter - their_counter; + if (unlikely(difference >= BITS_PER_LONG)) + goto out; + ret = !test_and_set_bit(difference, &counter->receive.backtrack); + +out: + spin_unlock_bh(&counter->receive.lock); + return ret; +} + +#ifdef DEBUG +void packet_counter_selftest(void) +{ + bool success = true; + unsigned int i = 0; + union noise_counter counter = { { 0 } }; + spin_lock_init(&counter.receive.lock); + +#define T(n, v) do { ++i; if (counter_validate(&counter, n) != v) { pr_info("nonce counter self-test %u: FAIL\n", i); success = false; } } while (0) + T(0, true); + T(1, true); + T(1, false); + T(9, true); + T(8, true); + T(7, true); + T(7, false); + T(BITS_PER_LONG, true); + T(BITS_PER_LONG - 1, true); + T(BITS_PER_LONG - 1, false); + T(BITS_PER_LONG - 2, true); + T(2, true); + T(2, false); + T(BITS_PER_LONG + 16, true); + T(3, false); + T(BITS_PER_LONG + 16, false); + T(BITS_PER_LONG * 4, true); + T(BITS_PER_LONG * 4 - (BITS_PER_LONG - 1), true); + T(10, false); + T(BITS_PER_LONG * 4 - BITS_PER_LONG, false); + T(BITS_PER_LONG * 4 - (BITS_PER_LONG + 1), false); + T(BITS_PER_LONG * 4 - (BITS_PER_LONG - 2), true); + T(BITS_PER_LONG * 4 + 1 - BITS_PER_LONG, false); + T(0, false); + T(REJECT_AFTER_MESSAGES, false); + T(REJECT_AFTER_MESSAGES - 1, true); + T(REJECT_AFTER_MESSAGES, false); + T(REJECT_AFTER_MESSAGES - 1, false); + T(REJECT_AFTER_MESSAGES - 2, true); + T(REJECT_AFTER_MESSAGES + 1, false); + T(REJECT_AFTER_MESSAGES + 2, false); + T(REJECT_AFTER_MESSAGES - 2, false); + T(REJECT_AFTER_MESSAGES - 3, true); + T(0, false); +#undef T + + if (success) + pr_info("nonce counter self-tests: pass\n"); +} +#endif + +static inline size_t skb_padding(struct sk_buff *skb) +{ + /* We do this modulo business with the MTU, just in case the networking layer + * gives us a packet that's bigger than the MTU. Now that we support GSO, this + * shouldn't be a real problem, and this can likely be removed. But, caution! */ + size_t last_unit = skb->len % skb->dev->mtu; + size_t padded_size = (last_unit + MESSAGE_PADDING_MULTIPLE - 1) & ~(MESSAGE_PADDING_MULTIPLE - 1); + if (padded_size > skb->dev->mtu) + padded_size = skb->dev->mtu; + return padded_size - last_unit; +} + +static inline void skb_reset(struct sk_buff *skb) +{ + skb_scrub_packet(skb, false); + memset(&skb->headers_start, 0, offsetof(struct sk_buff, headers_end) - offsetof(struct sk_buff, headers_start)); + skb->queue_mapping = 0; + skb->nohdr = 0; + skb->peeked = 0; + skb->mac_len = 0; + skb->dev = NULL; + skb->hdr_len = skb_headroom(skb); + skb->mac_header = (typeof(skb->mac_header))~0U; + skb->transport_header = (typeof(skb->transport_header))~0U; + skb_reset_network_header(skb); +} + +static inline void skb_encrypt(struct sk_buff *skb, struct packet_data_encryption_ctx *ctx) +{ + struct scatterlist sg[ctx->num_frags]; /* This should be bound to at most 128 by the caller. */ + struct message_data *header; + + /* We have to remember to add the checksum to the innerpacket, in case the receiver forwards it. */ + if (likely(!skb_checksum_setup(skb, true))) + skb_checksum_help(skb); + + /* Only after checksumming can we safely add on the padding at the end and the header. */ + header = (struct message_data *)skb_push(skb, sizeof(struct message_data)); + header->header.type = MESSAGE_DATA; + header->key_idx = ctx->keypair->remote_index; + header->counter = cpu_to_le64(ctx->nonce); + pskb_put(skb, ctx->trailer, ctx->trailer_len); + + /* Now we can encrypt the scattergather segments */ + sg_init_table(sg, ctx->num_frags); + skb_to_sgvec(skb, sg, sizeof(struct message_data), noise_encrypted_len(ctx->plaintext_len)); + chacha20poly1305_encrypt_sg(sg, sg, ctx->plaintext_len, NULL, 0, ctx->nonce, ctx->keypair->sending.key); + + /* When we're done, we free the reference to the key pair */ + noise_keypair_put(ctx->keypair); +} + +static inline bool skb_decrypt(struct sk_buff *skb, unsigned int num_frags, uint64_t nonce, struct noise_symmetric_key *key) +{ + struct scatterlist sg[num_frags]; /* This should be bound to at most 128 by the caller. */ + + if (unlikely(!key)) + return false; + + if (unlikely(!key->is_valid || time_is_before_eq_jiffies64(key->birthdate + REJECT_AFTER_TIME) || key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) { + key->is_valid = false; + return false; + } + + sg_init_table(sg, num_frags); + skb_to_sgvec(skb, sg, 0, skb->len); + + if (!chacha20poly1305_decrypt_sg(sg, sg, skb->len, NULL, 0, nonce, key->key)) + return false; + + return pskb_trim(skb, skb->len - noise_encrypted_len(0)) == 0; +} + +static inline bool get_encryption_nonce(uint64_t *nonce, struct noise_symmetric_key *key) +{ + if (unlikely(!key)) + return false; + + if (unlikely(!key->is_valid || time_is_before_eq_jiffies64(key->birthdate + REJECT_AFTER_TIME))) { + key->is_valid = false; + return false; + } + + *nonce = atomic64_inc_return(&key->counter.counter) - 1; + if (*nonce >= REJECT_AFTER_MESSAGES) { + key->is_valid = false; + return false; + } + + return true; +} + +#ifdef CONFIG_WIREGUARD_PARALLEL +static void do_encryption(struct padata_priv *padata) +{ + struct packet_data_encryption_ctx *ctx = container_of(padata, struct packet_data_encryption_ctx, padata); + + skb_encrypt(ctx->skb, ctx); + skb_reset(ctx->skb); + + padata_do_serial(padata); +} + +static void finish_encryption(struct padata_priv *padata) +{ + struct packet_data_encryption_ctx *ctx = container_of(padata, struct packet_data_encryption_ctx, padata); + + ctx->callback(ctx->skb, ctx->peer); +} + +static inline int start_encryption(struct padata_instance *padata, struct padata_priv *priv, int cb_cpu) +{ + memset(priv, 0, sizeof(struct padata_priv)); + priv->parallel = do_encryption; + priv->serial = finish_encryption; + return padata_do_parallel(padata, priv, cb_cpu); +} + +static inline unsigned int choose_cpu(__le32 key) +{ + unsigned int cpu_index, cpu, cb_cpu; + + /* This ensures that packets encrypted to the same key are sent in-order. */ + cpu_index = ((__force unsigned int)key) % cpumask_weight(cpu_online_mask); + cb_cpu = cpumask_first(cpu_online_mask); + for (cpu = 0; cpu < cpu_index; ++cpu) + cb_cpu = cpumask_next(cb_cpu, cpu_online_mask); + + return cb_cpu; +} +#endif + +int packet_create_data(struct sk_buff *skb, struct wireguard_peer *peer, void(*callback)(struct sk_buff *, struct wireguard_peer *), bool parallel) +{ + int ret = -ENOKEY; + struct noise_keypair *keypair; + struct packet_data_encryption_ctx *ctx = NULL; + u64 nonce; + struct sk_buff *trailer = NULL; + size_t plaintext_len, padding_len, trailer_len; + unsigned int num_frags; + + rcu_read_lock(); + keypair = rcu_dereference(peer->keypairs.current_keypair); + if (unlikely(!keypair)) + goto err_rcu; + kref_get(&keypair->refcount); + rcu_read_unlock(); + + if (unlikely(!get_encryption_nonce(&nonce, &keypair->sending))) + goto err; + + padding_len = skb_padding(skb); + trailer_len = padding_len + noise_encrypted_len(0); + plaintext_len = skb->len + padding_len; + + /* Expand data section to have room for padding and auth tag */ + ret = skb_cow_data(skb, trailer_len, &trailer); + if (unlikely(ret < 0)) + goto err; + num_frags = ret; + ret = -ENOMEM; + if (unlikely(num_frags > 128)) + goto err; + + /* Set the padding to zeros, and make sure it and the auth tag are part of the skb */ + memset(skb_tail_pointer(trailer), 0, padding_len); + + /* Expand head section to have room for our header and the network stack's headers, + * plus our key and nonce in the head. */ + ret = skb_cow_head(skb, DATA_PACKET_HEAD_ROOM); + if (unlikely(ret < 0)) + goto err; + + ctx = (struct packet_data_encryption_ctx *)skb->head; + ctx->skb = skb; + ctx->callback = callback; + ctx->peer = peer; + ctx->num_frags = num_frags; + ctx->trailer_len = trailer_len; + ctx->trailer = trailer; + ctx->plaintext_len = plaintext_len; + ctx->nonce = nonce; + ctx->keypair = keypair; + +#ifdef CONFIG_WIREGUARD_PARALLEL + if (parallel && cpumask_weight(cpu_online_mask) > 1) { + unsigned int cpu = choose_cpu(keypair->remote_index); + ret = start_encryption(peer->device->parallel_send, &ctx->padata, cpu); + if (unlikely(ret < 0)) + goto err; + } else +#endif + { + skb_encrypt(skb, ctx); + skb_reset(skb); + callback(skb, peer); + } + return 0; + +err: + noise_keypair_put(keypair); + return ret; +err_rcu: + rcu_read_unlock(); + return ret; +} + +struct packet_data_decryption_ctx { + struct padata_priv padata; + struct sk_buff *skb; + void (*callback)(struct sk_buff *skb, struct wireguard_peer *, struct sockaddr_storage *, bool used_new_key, int err); + struct noise_keypair *keypair; + struct sockaddr_storage addr; + uint64_t nonce; + unsigned int num_frags; + int ret; +}; + +static void begin_decrypt_packet(struct packet_data_decryption_ctx *ctx) +{ + if (unlikely(!skb_decrypt(ctx->skb, ctx->num_frags, ctx->nonce, &ctx->keypair->receiving))) + goto err; + + skb_reset(ctx->skb); + ctx->ret = 0; + return; + +err: + ctx->ret = -ENOKEY; + peer_put(ctx->keypair->entry.peer); +} + +static void finish_decrypt_packet(struct packet_data_decryption_ctx *ctx) +{ + struct noise_keypairs *keypairs; + bool used_new_key = false; + int ret = ctx->ret; + if (ret) + goto err; + + keypairs = &ctx->keypair->entry.peer->keypairs; + ret = counter_validate(&ctx->keypair->receiving.counter, ctx->nonce) ? 0 : -ERANGE; + + if (likely(!ret)) + used_new_key = noise_received_with_keypair(&ctx->keypair->entry.peer->keypairs, ctx->keypair); + else { + /* TODO: currently either the nonce window is not big enough, or we're sending things in + * the wrong order. Try uncommenting the below code to see for yourself. This is a problem + * that needs to be solved. + * + * Debug with: + * #define XSTR(s) STR(s) + * #define STR(s) #s + * net_dbg_ratelimited("Packet has invalid nonce %Lu (max %Lu, backtrack %" XSTR(BITS_PER_LONG) "pbl)\n", ctx->nonce, ctx->keypair->receiving.counter.receive.counter, &ctx->keypair->receiving.counter.receive.backtrack); + */ + peer_put(ctx->keypair->entry.peer); + goto err; + } + + noise_keypair_put(ctx->keypair); + ctx->callback(ctx->skb, ctx->keypair->entry.peer, &ctx->addr, used_new_key, 0); + return; + +err: + noise_keypair_put(ctx->keypair); + ctx->callback(ctx->skb, NULL, NULL, false, ret); +} + +#ifdef CONFIG_WIREGUARD_PARALLEL +static void do_decryption(struct padata_priv *padata) +{ + struct packet_data_decryption_ctx *ctx = container_of(padata, struct packet_data_decryption_ctx, padata); + begin_decrypt_packet(ctx); + padata_do_serial(padata); +} + +static void finish_decryption(struct padata_priv *padata) +{ + struct packet_data_decryption_ctx *ctx = container_of(padata, struct packet_data_decryption_ctx, padata); + finish_decrypt_packet(ctx); + kfree(ctx); +} + +static inline int start_decryption(struct padata_instance *padata, struct padata_priv *priv, int cb_cpu) +{ + priv->parallel = do_decryption; + priv->serial = finish_decryption; + return padata_do_parallel(padata, priv, cb_cpu); +} +#endif + +void packet_consume_data(struct sk_buff *skb, size_t offset, struct wireguard_device *wg, void(*callback)(struct sk_buff *skb, struct wireguard_peer *, struct sockaddr_storage *, bool used_new_key, int err)) +{ + int ret; + struct sockaddr_storage addr = { 0 }; + unsigned int num_frags; + struct sk_buff *trailer; + struct message_data *header; + struct noise_keypair *keypair; + uint64_t nonce; + __le32 idx; + + ret = socket_addr_from_skb(&addr, skb); + if (unlikely(ret < 0)) + goto err; + + ret = -ENOMEM; + if (unlikely(!pskb_may_pull(skb, offset + sizeof(struct message_data)))) + goto err; + + header = (struct message_data *)(skb->data + offset); + offset += sizeof(struct message_data); + skb_pull(skb, offset); + + idx = header->key_idx; + nonce = le64_to_cpu(header->counter); + + ret = skb_cow_data(skb, 0, &trailer); + if (unlikely(ret < 0)) + goto err; + num_frags = ret; + ret = -ENOMEM; + if (unlikely(num_frags > 128)) + goto err; + ret = -EINVAL; + rcu_read_lock(); + keypair = (struct noise_keypair *)index_hashtable_lookup(&wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx); + if (unlikely(!keypair)) { + rcu_read_unlock(); + goto err; + } + kref_get(&keypair->refcount); + rcu_read_unlock(); +#ifdef CONFIG_WIREGUARD_PARALLEL + if (cpumask_weight(cpu_online_mask) > 1) { + struct packet_data_decryption_ctx *ctx; + unsigned int cpu = choose_cpu(idx); + + ret = -ENOMEM; + ctx = kzalloc(sizeof(struct packet_data_decryption_ctx), GFP_ATOMIC); + if (unlikely(!ctx)) + goto err_peer; + + ctx->skb = skb; + ctx->keypair = keypair; + ctx->callback = callback; + ctx->nonce = nonce; + ctx->num_frags = num_frags; + ctx->addr = addr; + ret = start_decryption(wg->parallel_receive, &ctx->padata, cpu); + if (unlikely(ret)) { + kfree(ctx); + goto err_peer; + } + } else +#endif + { + struct packet_data_decryption_ctx ctx = { + .skb = skb, + .keypair = keypair, + .callback = callback, + .nonce = nonce, + .num_frags = num_frags, + .addr = addr + }; + begin_decrypt_packet(&ctx); + finish_decrypt_packet(&ctx); + } + return; + +#ifdef CONFIG_WIREGUARD_PARALLEL +err_peer: + peer_put(keypair->entry.peer); + noise_keypair_put(keypair); +#endif +err: + callback(skb, NULL, NULL, false, ret); +} diff --git a/src/debug.mk b/src/debug.mk new file mode 100644 index 0000000..5385092 --- /dev/null +++ b/src/debug.mk @@ -0,0 +1,70 @@ +REMOTE_HOST1 ?= root@172.16.48.128 +REMOTE_HOST2 ?= root@172.16.48.129 +REMOTE_HOST3 ?= root@172.16.48.130 +PEER1 := [Peer]\nPublicKey=UQGBaem0U6JjIObMQzunZ2Euv8MMYcUUdWKJV87WDE8=\nAllowedIPs=192.168.2.1/32,abcd::1/128\nEndpoint=$(subst root@,,$(REMOTE_HOST1)):12912\n +PEER2 := [Peer]\nPublicKey=tNXrD6GCvHRNgoZ/D/BmTbTbzoVGZh0R2V6rzY6hwl4=\nAllowedIPs=192.168.2.2/32,abcd::2/128\nEndpoint=$(subst root@,,$(REMOTE_HOST2)):21281\n +PEER3 := [Peer]\nPublicKey=gLvFUb1FTyoACC/yZNqGLKnNkt+w30JEvfFChDVuewo=\nAllowedIPs=192.168.2.3/32,abcd::3/128\nEndpoint=$(subst root@,,$(REMOTE_HOST3)):54812\n +SSH_OPTS := -q -o ControlMaster=auto -o ControlPath=.ssh-deployment.sock +SSH_OPTS1 := $(SSH_OPTS)-1 +SSH_OPTS2 := $(SSH_OPTS)-2 +SSH_OPTS3 := $(SSH_OPTS)-3 +RSYNC_OPTS := --include="tools" --include="noise" --include="crypto" --include="*.mk" --include="*.sh" --include="*.8" --include="*.S" --include="*.c" --include="*.h" --include="Makefile" --exclude="*" -avP #--delete --delete-excluded + +MAYBE_DEBUG := "debug" +ifeq ($(D),0) +MAYBE_DEBUG := +endif + +quick: debug + sudo modprobe ip6_udp_tunnel + sudo modprobe udp_tunnel + sudo modprobe x_tables + -sudo rmmod wireguard + sudo insmod wireguard.ko + bash netns.sh $(QUICK_ARGS) + +remote-quick: + ssh $(SSH_OPTS1) -Nf $(REMOTE_HOST1) + rsync --rsh="ssh $(SSH_OPTS1)" $(RSYNC_OPTS) . $(REMOTE_HOST1):wireguard-build/ + ssh $(SSH_OPTS1) $(REMOTE_HOST1) 'make -C wireguard-build quick -j$$(nproc) "QUICK_ARGS=$(QUICK_ARGS)"' + ssh $(SSH_OPTS1) -O exit $(REMOTE_HOST1) + +remote-run-1: + ssh $(SSH_OPTS1) -Nf $(REMOTE_HOST1) + rsync --rsh="ssh $(SSH_OPTS1)" $(RSYNC_OPTS) . $(REMOTE_HOST1):wireguard-build/ + ssh $(SSH_OPTS1) $(REMOTE_HOST1) 'ip l d wg0; rmmod wireguard; cd wireguard-build && make -j$$(nproc) $(MAYBE_DEBUG) && make install' + ssh $(SSH_OPTS1) $(REMOTE_HOST1) 'ip l a wg0 type wireguard' + printf '[Interface]\nListenPort=12912\nPrivateKey=4IoHwlfTyKb9Z9W1YPmBmZvSiU6qcs0oa4xnjAEm/3U=\n$(PEER2)$(PEER3)' | ssh $(SSH_OPTS1) $(REMOTE_HOST1) 'cat > config.conf' + ssh $(SSH_OPTS1) $(REMOTE_HOST1) 'wg setconf wg0 config.conf' + ssh $(SSH_OPTS1) $(REMOTE_HOST1) 'ip l set up dev wg0' + ssh $(SSH_OPTS1) $(REMOTE_HOST1) 'ip a a 192.168.2.1/24 dev wg0' + ssh $(SSH_OPTS1) $(REMOTE_HOST1) 'ip a a abcd::1/120 dev wg0' + ssh $(SSH_OPTS1) -O exit $(REMOTE_HOST1) + + +remote-run-2: + ssh $(SSH_OPTS2) -Nf $(REMOTE_HOST2) + rsync --rsh="ssh $(SSH_OPTS2)" $(RSYNC_OPTS) . $(REMOTE_HOST2):wireguard-build/ + ssh $(SSH_OPTS2) $(REMOTE_HOST2) 'ip l d wg0; rmmod wireguard; cd wireguard-build && make -j$$(nproc) $(MAYBE_DEBUG) && make install' + ssh $(SSH_OPTS2) $(REMOTE_HOST2) 'ip l a wg0 type wireguard' + printf '[Interface]\nListenPort=21281\nPrivateKey=kEKL+m4h5xTn2cYKU6NTEv32kuXHAkuqrjdT9VtsnX8=\n$(PEER1)$(PEER3)' | ssh $(SSH_OPTS2) $(REMOTE_HOST2) 'cat > config.conf' + ssh $(SSH_OPTS2) $(REMOTE_HOST2) 'wg setconf wg0 config.conf' + ssh $(SSH_OPTS2) $(REMOTE_HOST2) 'ip l set up dev wg0' + ssh $(SSH_OPTS2) $(REMOTE_HOST2) 'ip a a 192.168.2.2/24 dev wg0' + ssh $(SSH_OPTS2) $(REMOTE_HOST2) 'ip a a abcd::2/120 dev wg0' + ssh $(SSH_OPTS2) -O exit $(REMOTE_HOST2) + +remote-run-3: + ssh $(SSH_OPTS3) -Nf $(REMOTE_HOST3) + rsync --rsh="ssh $(SSH_OPTS3)" $(RSYNC_OPTS) . $(REMOTE_HOST3):wireguard-build/ + ssh $(SSH_OPTS3) $(REMOTE_HOST3) 'ip l d wg0; rmmod wireguard; cd wireguard-build && make -j$$(nproc) $(MAYBE_DEBUG) && make install' + ssh $(SSH_OPTS3) $(REMOTE_HOST3) 'ip l a wg0 type wireguard' + printf '[Interface]\nListenPort=54812\nPrivateKey=qFunvj5kgENrtWn754hNBLrk5mMA+8+evVtnI2YqWkk=\n$(PEER1)$(PEER2)' | ssh $(SSH_OPTS3) $(REMOTE_HOST3) 'cat > config.conf' + ssh $(SSH_OPTS3) $(REMOTE_HOST3) 'wg setconf wg0 config.conf' + ssh $(SSH_OPTS3) $(REMOTE_HOST3) 'ip l set up dev wg0' + ssh $(SSH_OPTS3) $(REMOTE_HOST3) 'ip a a 192.168.2.3/24 dev wg0' + ssh $(SSH_OPTS3) $(REMOTE_HOST3) 'ip a a abcd::3/120 dev wg0' + ssh $(SSH_OPTS3) -O exit $(REMOTE_HOST3) + +remote-run: + $(MAKE) -j3 remote-run-1 remote-run-2 remote-run-3 diff --git a/src/device.c b/src/device.c new file mode 100644 index 0000000..4076e58 --- /dev/null +++ b/src/device.c @@ -0,0 +1,352 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include "wireguard.h" +#include "packets.h" +#include "socket.h" +#include "timers.h" +#include "device.h" +#include "config.h" +#include "peer.h" +#include "uapi.h" +#include "messages.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_QUEUED_PACKETS 1024 + +static int init(struct net_device *dev) +{ + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + return 0; +} +static void uninit(struct net_device *dev) +{ + free_percpu(dev->tstats); +} + +static int open_peer(struct wireguard_peer *peer, void *data) +{ + socket_set_peer_dst(peer); + timers_init_peer(peer); + packet_send_queue(peer); + return 0; +} + +static int open(struct net_device *dev) +{ + struct wireguard_device *wg = netdev_priv(dev); + int rc = socket_init(wg); + if (rc < 0) + return rc; + peer_for_each(wg, open_peer, NULL); + return 0; +} + +static int stop_peer(struct wireguard_peer *peer, void *data) +{ + timers_uninit_peer_wait(peer); + noise_handshake_clear(&peer->handshake); + noise_keypairs_clear(&peer->keypairs); + return 0; +} + +static int stop(struct net_device *dev) +{ + struct wireguard_device *wg = netdev_priv(dev); + peer_for_each(wg, stop_peer, NULL); + skb_queue_purge(&wg->incoming_handshakes); + socket_uninit(wg); + return 0; +} + +static void skb_unsendable(struct sk_buff *skb, struct net_device *dev) +{ + /* This conntrack stuff is because the rate limiting needs to be applied + * to the original src IP, so we have to restore saddr in the IP header. */ + struct nf_conn *ct = NULL; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + enum ip_conntrack_info ctinfo; + ct = nf_ct_get(skb, &ctinfo); +#endif + ++dev->stats.tx_errors; + + if (skb->len < sizeof(struct iphdr)) + goto free; + + if (ip_hdr(skb)->version == 4) { + if (ct) + ip_hdr(skb)->saddr = ct->tuplehash[0].tuple.src.u3.ip; + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); + } else if (ip_hdr(skb)->version == 6) { + if (ct) + ipv6_hdr(skb)->saddr = ct->tuplehash[0].tuple.src.u3.in6; + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); + } +free: + kfree_skb(skb); +} + +static netdev_tx_t xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct wireguard_device *wg = netdev_priv(dev); + struct wireguard_peer *peer; + int ret; + + if (unlikely(dev_recursion_level() > 4)) { + net_dbg_ratelimited("Routing loop detected\n"); + skb_unsendable(skb, dev); + return -ELOOP; + } + + dev->trans_start = jiffies; + + peer = routing_table_lookup_dst(&wg->peer_routing_table, skb); + if (unlikely(!peer)) { + skb_unsendable(skb, dev); + return -ENOKEY; + } + + read_lock_bh(&peer->endpoint_lock); + ret = unlikely(peer->endpoint_addr.ss_family != AF_INET && peer->endpoint_addr.ss_family != AF_INET6); + read_unlock_bh(&peer->endpoint_lock); + if (ret) { + net_dbg_ratelimited("No valid endpoint has been configured or discovered for device\n"); + peer_put(peer); + skb_unsendable(skb, dev); + return -EHOSTUNREACH; + } + + /* If the queue is getting too big, we start removing the oldest packets until it's small again. + * We do this before adding the new packet, so we don't remove GSO segments that are in excess. */ + while (skb_queue_len(&peer->tx_packet_queue) > MAX_QUEUED_PACKETS) + dev_kfree_skb(skb_dequeue(&peer->tx_packet_queue)); + + if (!skb_is_gso(skb)) + skb->next = NULL; + else { + struct sk_buff *segs = skb_gso_segment(skb, 0); + if (unlikely(IS_ERR(segs))) { + skb_unsendable(skb, dev); + peer_put(peer); + return PTR_ERR(segs); + } + dev_kfree_skb(skb); + skb = segs; + } + while (skb) { + struct sk_buff *next = skb->next; + skb->next = skb->prev = NULL; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + continue; + + /* We only need to keep the original dst around for icmp, + * so at this point we're in a position to drop it. */ + skb_dst_drop(skb); + + skb_queue_tail(&peer->tx_packet_queue, skb); + skb = next; + } + + ret = packet_send_queue(peer); + peer_put(peer); + return ret; +} + + +static int ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct wireguard_device *wg = netdev_priv(dev); + + if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case WG_GET_DEVICE: + return config_get_device(wg, ifr->ifr_ifru.ifru_data); + case WG_SET_DEVICE: + return config_set_device(wg, ifr->ifr_ifru.ifru_data); + } + return -EINVAL; +} + +static const struct net_device_ops netdev_ops = { + .ndo_init = init, + .ndo_uninit = uninit, + .ndo_open = open, + .ndo_stop = stop, + .ndo_start_xmit = xmit, + .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_do_ioctl = ioctl +}; + +static void destruct(struct net_device *dev) +{ + struct wireguard_device *wg = netdev_priv(dev); + + mutex_lock(&wg->device_update_lock); + peer_remove_all(wg); + wg->incoming_port = 0; + destroy_workqueue(wg->workqueue); +#ifdef CONFIG_WIREGUARD_PARALLEL + destroy_workqueue(wg->parallelqueue); + padata_free(wg->parallel_send); + padata_free(wg->parallel_receive); +#endif + routing_table_free(&wg->peer_routing_table); + memzero_explicit(&wg->static_identity, sizeof(struct noise_static_identity)); + skb_queue_purge(&wg->incoming_handshakes); + socket_uninit(wg); + cookie_checker_uninit(&wg->cookie_checker); + mutex_unlock(&wg->device_update_lock); + + put_net(wg->creating_net); + + pr_debug("Device %s has been deleted\n", dev->name); + free_netdev(dev); +} + +#define WG_FEATURES (NETIF_F_HW_CSUM | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA) + +static void setup(struct net_device *dev) +{ + struct wireguard_device *wg = netdev_priv(dev); + + dev->netdev_ops = &netdev_ops; + dev->destructor = destruct; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->needed_headroom = DATA_PACKET_HEAD_ROOM; + dev->needed_tailroom = noise_encrypted_len(MESSAGE_PADDING_MULTIPLE); + dev->type = ARPHRD_NONE; + dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) + dev->flags |= IFF_NO_QUEUE; +#else + dev->tx_queue_len = 0; +#endif + dev->features |= NETIF_F_LLTX; + dev->features |= WG_FEATURES; + dev->hw_features |= WG_FEATURES; + dev->hw_enc_features |= WG_FEATURES; + dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH - sizeof(struct udphdr) - max(sizeof(struct ipv6hdr), sizeof(struct iphdr)); + + /* We need to keep the dst around in case of icmp replies. */ + netif_keep_dst(dev); + + memset(wg, 0, sizeof(struct wireguard_device)); +} + +static int newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) +{ + int ret = 0; + struct wireguard_device *wg = netdev_priv(dev); + + wg->creating_net = get_net(src_net); + init_rwsem(&wg->static_identity.lock); + mutex_init(&wg->socket_update_lock); + mutex_init(&wg->device_update_lock); + skb_queue_head_init(&wg->incoming_handshakes); + INIT_WORK(&wg->incoming_handshakes_work, packet_process_queued_handshake_packets); + pubkey_hashtable_init(&wg->peer_hashtable); + index_hashtable_init(&wg->index_hashtable); + routing_table_init(&wg->peer_routing_table); + INIT_LIST_HEAD(&wg->peer_list); + + wg->workqueue = alloc_workqueue(KBUILD_MODNAME "-%s", WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name); + if (!wg->workqueue) { + ret = -ENOMEM; + goto err; + } + +#ifdef CONFIG_WIREGUARD_PARALLEL + wg->parallelqueue = alloc_workqueue(KBUILD_MODNAME "-crypt-%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1, dev->name); + if (!wg->parallelqueue) { + ret = -ENOMEM; + goto err; + } + + wg->parallel_send = padata_alloc_possible(wg->parallelqueue); + if (!wg->parallel_send) { + ret = -ENOMEM; + goto err; + } + padata_start(wg->parallel_send); + + wg->parallel_receive = padata_alloc_possible(wg->parallelqueue); + if (!wg->parallel_receive) { + ret = -ENOMEM; + goto err; + } + padata_start(wg->parallel_receive); +#endif + + ret = cookie_checker_init(&wg->cookie_checker, wg); + if (ret < 0) + goto err; + + ret = register_netdevice(dev); + if (ret < 0) + goto err; + + pr_debug("Device %s has been created\n", dev->name); + + return 0; + +err: + put_net(src_net); + if (wg->workqueue) + destroy_workqueue(wg->workqueue); +#ifdef CONFIG_WIREGUARD_PARALLEL + if (wg->parallelqueue) + destroy_workqueue(wg->parallelqueue); + if (wg->parallel_send) + padata_free(wg->parallel_send); + if (wg->parallel_receive) + padata_free(wg->parallel_receive); +#endif + if (wg->cookie_checker.device) + cookie_checker_uninit(&wg->cookie_checker); + return ret; +} + +static void dellink(struct net_device *dev, struct list_head *head) +{ + unregister_netdevice_queue(dev, head); +} + +static struct rtnl_link_ops link_ops __read_mostly = { + .kind = KBUILD_MODNAME, + .priv_size = sizeof(struct wireguard_device), + .setup = setup, + .newlink = newlink, + .dellink = dellink +}; + +int device_init(void) +{ + int ret = rtnl_link_register(&link_ops); + if (ret < 0) { + pr_err("Cannot register link_ops\n"); + return ret; + } + return ret; +} + +void device_uninit(void) +{ + rtnl_link_unregister(&link_ops); + rcu_barrier(); +} diff --git a/src/device.h b/src/device.h new file mode 100644 index 0000000..a3f00da --- /dev/null +++ b/src/device.h @@ -0,0 +1,9 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef WGDEVICE_H +#define WGDEVICE_H + +int device_init(void); +void device_uninit(void); + +#endif diff --git a/src/hashtables.c b/src/hashtables.c new file mode 100644 index 0000000..8911625 --- /dev/null +++ b/src/hashtables.c @@ -0,0 +1,137 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include "wireguard.h" +#include "hashtables.h" +#include "peer.h" +#include "crypto/siphash24.h" +#include "noise.h" +#include + +static inline struct hlist_head *pubkey_bucket(struct pubkey_hashtable *table, const uint8_t pubkey[NOISE_PUBLIC_KEY_LEN]) +{ + /* siphash24 gives us a secure 64bit number based on a random key. Since the bits are + * uniformly distributed, we can then mask off to get the bits we need. */ + return &table->hashtable[siphash24(pubkey, NOISE_PUBLIC_KEY_LEN, table->key) & (HASH_SIZE(table->hashtable) - 1)]; +} + +void pubkey_hashtable_init(struct pubkey_hashtable *table) +{ + get_random_bytes(table->key, SIPHASH24_KEY_LEN); + hash_init(table->hashtable); + mutex_init(&table->lock); +} + +void pubkey_hashtable_add(struct pubkey_hashtable *table, struct wireguard_peer *peer) +{ + mutex_lock(&table->lock); + hlist_add_head_rcu(&peer->pubkey_hash, pubkey_bucket(table, peer->handshake.remote_static)); + mutex_unlock(&table->lock); +} + +void pubkey_hashtable_remove(struct pubkey_hashtable *table, struct wireguard_peer *peer) +{ + mutex_lock(&table->lock); + hlist_del_init_rcu(&peer->pubkey_hash); + mutex_unlock(&table->lock); +} + +/* Returns a strong reference to a peer */ +struct wireguard_peer *pubkey_hashtable_lookup(struct pubkey_hashtable *table, const uint8_t pubkey[NOISE_PUBLIC_KEY_LEN]) +{ + struct wireguard_peer *iter_peer, *peer = NULL; + rcu_read_lock(); + hlist_for_each_entry_rcu(iter_peer, pubkey_bucket(table, pubkey), pubkey_hash) { + if (!memcmp(pubkey, iter_peer->handshake.remote_static, NOISE_PUBLIC_KEY_LEN)) { + peer = iter_peer; + break; + } + } + peer = peer_get(peer); + rcu_read_unlock(); + return peer; +} + +static inline struct hlist_head *index_bucket(struct index_hashtable *table, const __le32 index) +{ + /* Since the indices are random and thus all bits are uniformly distributed, + * we can find its bucket simply by masking. */ + return &table->hashtable[(__force u32)index & (HASH_SIZE(table->hashtable) - 1)]; +} + +void index_hashtable_init(struct index_hashtable *table) +{ + hash_init(table->hashtable); + spin_lock_init(&table->lock); +} + +__le32 index_hashtable_insert(struct index_hashtable *table, struct index_hashtable_entry *entry) +{ + struct index_hashtable_entry *existing_entry; + + spin_lock(&table->lock); + hlist_del_init_rcu(&entry->index_hash); + spin_unlock(&table->lock); + + rcu_read_lock(); + +search_unused_slot: + /* First we try to find an unused slot, randomly, while unlocked. */ + get_random_bytes(&entry->index, sizeof(entry->index)); + hlist_for_each_entry_rcu(existing_entry, index_bucket(table, entry->index), index_hash) { + if (existing_entry->index == entry->index) + goto search_unused_slot; /* If it's already in use, we continue searching. */ + } + + /* Once we've found an unused slot, we lock it, and then double-check + * that nobody else stole it from us. */ + spin_lock(&table->lock); + hlist_for_each_entry_rcu(existing_entry, index_bucket(table, entry->index), index_hash) { + if (existing_entry->index == entry->index) { + spin_unlock(&table->lock); + goto search_unused_slot; /* If it was stolen, we start over. */ + } + } + /* Otherwise, we know we have it exclusively (since we're locked), so we insert. */ + hlist_add_head_rcu(&entry->index_hash, index_bucket(table, entry->index)); + spin_unlock(&table->lock); + + rcu_read_unlock(); + + return entry->index; +} + +void index_hashtable_replace(struct index_hashtable *table, struct index_hashtable_entry *old, struct index_hashtable_entry *new) +{ + spin_lock(&table->lock); + new->index = old->index; + hlist_replace_rcu(&old->index_hash, &new->index_hash); + INIT_HLIST_NODE(&old->index_hash); + spin_unlock(&table->lock); +} + +void index_hashtable_remove(struct index_hashtable *table, struct index_hashtable_entry *entry) +{ + spin_lock(&table->lock); + hlist_del_init_rcu(&entry->index_hash); + spin_unlock(&table->lock); +} + +/* Returns a strong reference to a entry->peer */ +struct index_hashtable_entry *index_hashtable_lookup(struct index_hashtable *table, const enum index_hashtable_type type_mask, const __le32 index) +{ + struct index_hashtable_entry *iter_entry, *entry = NULL; + rcu_read_lock(); + hlist_for_each_entry_rcu(iter_entry, index_bucket(table, index), index_hash) { + if (iter_entry->index == index && (iter_entry->type & type_mask)) { + entry = iter_entry; + break; + } + } + if (likely(entry)) { + entry->peer = peer_get(entry->peer); + if (unlikely(!entry->peer)) + entry = NULL; + } + rcu_read_unlock(); + return entry; +} diff --git a/src/hashtables.h b/src/hashtables.h new file mode 100644 index 0000000..495a6f0 --- /dev/null +++ b/src/hashtables.h @@ -0,0 +1,33 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef HASHTABLES_H +#define HASHTABLES_H + +#include +#include +#include "crypto/siphash24.h" + +struct pubkey_hashtable { + DECLARE_HASHTABLE(hashtable, 8); + uint8_t key[SIPHASH24_KEY_LEN]; + struct mutex lock; +}; + +void pubkey_hashtable_init(struct pubkey_hashtable *table); +void pubkey_hashtable_add(struct pubkey_hashtable *table, struct wireguard_peer *peer); +void pubkey_hashtable_remove(struct pubkey_hashtable *table, struct wireguard_peer *peer); +struct wireguard_peer *pubkey_hashtable_lookup(struct pubkey_hashtable *table, const uint8_t pubkey[NOISE_PUBLIC_KEY_LEN]); + +struct index_hashtable { + DECLARE_HASHTABLE(hashtable, 10); + spinlock_t lock; +}; +struct index_hashtable_entry; + +void index_hashtable_init(struct index_hashtable *table); +__le32 index_hashtable_insert(struct index_hashtable *table, struct index_hashtable_entry *entry); +void index_hashtable_replace(struct index_hashtable *table, struct index_hashtable_entry *old, struct index_hashtable_entry *new); +void index_hashtable_remove(struct index_hashtable *table, struct index_hashtable_entry *entry); +struct index_hashtable_entry *index_hashtable_lookup(struct index_hashtable *table, const enum index_hashtable_type type_mask, const __le32 index); + +#endif diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..bcc432a --- /dev/null +++ b/src/main.c @@ -0,0 +1,50 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include "wireguard.h" +#include "device.h" +#include "crypto/chacha20poly1305.h" +#include "crypto/blake2s.h" +#include "crypto/siphash24.h" +#include "crypto/curve25519.h" +#include "noise.h" +#include "packets.h" +#include +#include +#include + +static int __init mod_init(void) +{ + int ret = 0; + +#ifdef DEBUG + routing_table_selftest(); + packet_counter_selftest(); + curve25519_selftest(); + chacha20poly1305_selftest(); + blake2s_selftest(); + siphash24_selftest(); +#endif + chacha20poly1305_init(); + noise_init(); + + ret = device_init(); + if (ret < 0) + return ret; + + pr_info("WireGuard loaded. See www.wireguard.io for information.\n"); + pr_info("(C) Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved.\n"); + return ret; +} + +static void __exit mod_exit(void) +{ + device_uninit(); + pr_debug("Wireguard has been unloaded\n"); +} + +module_init(mod_init); +module_exit(mod_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Simple, secure, and speedy VPN tunnel"); +MODULE_AUTHOR("Jason A. Donenfeld "); +MODULE_ALIAS_RTNL_LINK(KBUILD_MODNAME); diff --git a/src/messages.h b/src/messages.h new file mode 100644 index 0000000..38bead5 --- /dev/null +++ b/src/messages.h @@ -0,0 +1,88 @@ +/* + * Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. + * + * See doc/protocol.md for more info + */ + +#ifndef MESSAGES_H +#define MESSAGES_H + +#include "noise.h" +#include "cookie.h" + +enum message_type { + MESSAGE_INVALID = 0, + MESSAGE_HANDSHAKE_INITIATION = 1, + MESSAGE_HANDSHAKE_RESPONSE = 2, + MESSAGE_HANDSHAKE_COOKIE = 3, + MESSAGE_DATA = 4, + MESSAGE_TOTAL = 5 +}; + +struct message_header { + u8 type; +} __packed; + +struct message_macs { + u8 mac1[COOKIE_LEN]; + u8 mac2[COOKIE_LEN]; +} __packed; + +struct message_handshake_initiation { + struct message_header header; + __le32 sender_index; + u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN]; + u8 encrypted_static[noise_encrypted_len(NOISE_PUBLIC_KEY_LEN)]; + u8 encrypted_timestamp[noise_encrypted_len(NOISE_TIMESTAMP_LEN)]; + struct message_macs macs; +} __packed; + +struct message_handshake_response { + struct message_header header; + __le32 sender_index; + __le32 receiver_index; + u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN]; + u8 encrypted_nothing[noise_encrypted_len(0)]; + struct message_macs macs; +} __packed; + +struct message_handshake_cookie { + struct message_header header; + __le32 receiver_index; + u8 salt[COOKIE_SALT_LEN]; + u8 encrypted_cookie[noise_encrypted_len(COOKIE_LEN)]; +} __packed; + +struct message_data { + struct message_header header; + __le32 key_idx; + __le64 counter; + u8 encrypted_data[]; +} __packed; + +#define message_data_len(plain_len) (noise_encrypted_len(plain_len) + sizeof(struct message_data)) + +enum message_alignments { + MESSAGE_DATA_TARGET_OFFSET = sizeof(struct message_data), + MESSAGE_DATA_TARGET_OPTIMAL_ALIGNMENT = 32, /* Per intel AVX recommendations */ + MESSAGE_PADDING_MULTIPLE = 16, + MESSAGE_MINIMUM_LENGTH = message_data_len(0) +}; + +static inline enum message_type message_determine_type(void *src, size_t src_len) +{ + struct message_header *header = src; + if (unlikely(src_len < sizeof(struct message_header))) + return MESSAGE_INVALID; + if (header->type == MESSAGE_DATA && src_len >= MESSAGE_MINIMUM_LENGTH) + return MESSAGE_DATA; + if (header->type == MESSAGE_HANDSHAKE_INITIATION && src_len == sizeof(struct message_handshake_initiation)) + return MESSAGE_HANDSHAKE_INITIATION; + if (header->type == MESSAGE_HANDSHAKE_RESPONSE && src_len == sizeof(struct message_handshake_response)) + return MESSAGE_HANDSHAKE_RESPONSE; + if (header->type == MESSAGE_HANDSHAKE_COOKIE && src_len == sizeof(struct message_handshake_cookie)) + return MESSAGE_HANDSHAKE_COOKIE; + return MESSAGE_INVALID; +} + +#endif diff --git a/src/netns.sh b/src/netns.sh new file mode 100644 index 0000000..2157e03 --- /dev/null +++ b/src/netns.sh @@ -0,0 +1,39 @@ +#!/bin/bash +[[ $UID != 0 ]] && exec sudo bash "$(readlink -f "$0")" "$@" +set -ex +cd "$(dirname "$(readlink -f "$0")")" + +cleanup() { + set +e + ip link del dev wgnetns1 + ip link del dev wgnetns2 + ip netns exec wgnetns ip link del dev wgnetns2 + killall iperf3 + ip netns del wgnetns + exit 0 +} + +trap cleanup EXIT + +ip link add dev wgnetns1 type wireguard +ip link add dev wgnetns2 type wireguard + +ip netns del wgnetns 2>/dev/null || true +ip netns add wgnetns +ip link set wgnetns2 netns wgnetns +ip netns exec wgnetns ip link set lo up + +ip addr add 192.168.241.1/24 dev wgnetns1 +ip netns exec wgnetns ip addr add 192.168.241.2/24 dev wgnetns2 + +key1="$(tools/wg genkey)" +key2="$(tools/wg genkey)" + +tools/wg set wgnetns1 private-key <(echo "$key1") listen-port 38281 peer "$(tools/wg pubkey <<<"$key2")" allowed-ips 192.168.241.2/24 endpoint 127.0.0.1:43928 +ip netns exec wgnetns tools/wg set wgnetns2 private-key <(echo "$key2") listen-port 43928 peer "$(tools/wg pubkey <<<"$key1")" allowed-ips 192.168.241.1/24 endpoint 127.0.0.1:38281 + +ip link set wgnetns1 up +ip netns exec wgnetns ip link set wgnetns2 up + +ip netns exec wgnetns iperf3 -s -D +stdbuf -o 0 iperf3 -i 1 -n 300000G "$@" -c 192.168.241.2 diff --git a/src/noise.c b/src/noise.c new file mode 100644 index 0000000..053d946 --- /dev/null +++ b/src/noise.c @@ -0,0 +1,565 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include "wireguard.h" +#include "noise.h" +#include "messages.h" +#include "packets.h" +#include "hashtables.h" +#include +#include +#include +#include +#include +#include + +/* This implements Noise_IK: + * + * <- s + * ****** + * -> e, dhes, s, dhss, t + * <- e, dhee, dhes + */ + +static const u8 handshake_name[33] = "Noise_IK_25519_ChaChaPoly_BLAKE2s"; +static const u8 handshake_psk_name[36] = "NoisePSK_IK_25519_ChaChaPoly_BLAKE2s"; +static u8 handshake_name_hash[NOISE_HASH_LEN]; +static u8 handshake_psk_name_hash[NOISE_HASH_LEN]; +static const u8 identifier_name[34] = "WireGuard v0 zx2c4 Jason@zx2c4.com"; +static atomic64_t keypair_counter = ATOMIC64_INIT(0); + +void noise_init(void) +{ + blake2s(handshake_name_hash, handshake_name, NULL, NOISE_HASH_LEN, sizeof(handshake_name), 0); + blake2s(handshake_psk_name_hash, handshake_psk_name, NULL, NOISE_HASH_LEN, sizeof(handshake_psk_name), 0); +} + +void noise_handshake_init(struct noise_handshake *handshake, struct noise_static_identity *static_identity, const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], struct wireguard_peer *peer) +{ + memset(handshake, 0, sizeof(struct noise_handshake)); + init_rwsem(&handshake->lock); + handshake->entry.type = INDEX_HASHTABLE_HANDSHAKE; + handshake->entry.peer = peer; + memcpy(handshake->remote_static, peer_public_key, NOISE_PUBLIC_KEY_LEN); + handshake->static_identity = static_identity; + handshake->state = HANDSHAKE_ZEROED; +} + +void noise_handshake_clear(struct noise_handshake *handshake) +{ + index_hashtable_remove(&handshake->entry.peer->device->index_hashtable, &handshake->entry); + down_write(&handshake->lock); + memset(&handshake->ephemeral_public, 0, NOISE_PUBLIC_KEY_LEN); + memset(&handshake->ephemeral_private, 0, NOISE_PUBLIC_KEY_LEN); + memset(&handshake->remote_ephemeral, 0, NOISE_PUBLIC_KEY_LEN); + memset(&handshake->hash, 0, NOISE_HASH_LEN); + memset(&handshake->chaining_key, 0, NOISE_HASH_LEN); + memset(&handshake->key, 0, NOISE_SYMMETRIC_KEY_LEN); + handshake->remote_index = 0; + handshake->state = HANDSHAKE_ZEROED; + up_write(&handshake->lock); + index_hashtable_remove(&handshake->entry.peer->device->index_hashtable, &handshake->entry); +} + +static struct noise_keypair *keypair_create(struct wireguard_peer *peer) +{ + struct noise_keypair *keypair = kzalloc(sizeof(struct noise_keypair), GFP_KERNEL); + if (unlikely(!keypair)) + return NULL; + keypair->internal_id = atomic64_inc_return(&keypair_counter); + keypair->entry.type = INDEX_HASHTABLE_KEYPAIR; + keypair->entry.peer = peer; + kref_init(&keypair->refcount); + return keypair; +} + +static void keypair_free_rcu(struct rcu_head *rcu) +{ + struct noise_keypair *keypair = container_of(rcu, struct noise_keypair, rcu); + net_dbg_ratelimited("Keypair %Lu destroyed for peer %Lu\n", keypair->internal_id, keypair->entry.peer->internal_id); + kzfree(keypair); +} + +static void keypair_free_kref(struct kref *kref) +{ + struct noise_keypair *keypair = container_of(kref, struct noise_keypair, refcount); + index_hashtable_remove(&keypair->entry.peer->device->index_hashtable, &keypair->entry); + call_rcu(&keypair->rcu, keypair_free_rcu); +} + +void noise_keypair_put(struct noise_keypair *keypair) +{ + if (unlikely(!keypair)) + return; + kref_put(&keypair->refcount, keypair_free_kref); +} + +void noise_keypairs_clear(struct noise_keypairs *keypairs) +{ + struct noise_keypair *old; + mutex_lock(&keypairs->keypair_update_lock); + old = rcu_dereference_protected(keypairs->previous_keypair, lockdep_is_held(&keypairs->keypair_update_lock)); + rcu_assign_pointer(keypairs->previous_keypair, NULL); + noise_keypair_put(old); + old = rcu_dereference_protected(keypairs->next_keypair, lockdep_is_held(&keypairs->keypair_update_lock)); + rcu_assign_pointer(keypairs->next_keypair, NULL); + noise_keypair_put(old); + old = rcu_dereference_protected(keypairs->current_keypair, lockdep_is_held(&keypairs->keypair_update_lock)); + rcu_assign_pointer(keypairs->current_keypair, NULL); + noise_keypair_put(old); + mutex_unlock(&keypairs->keypair_update_lock); +} + +static void add_new_keypair(struct noise_keypairs *keypairs, struct noise_keypair *new_keypair) +{ + struct noise_keypair *previous_keypair, *next_keypair, *current_keypair; + + mutex_lock(&keypairs->keypair_update_lock); + previous_keypair = rcu_dereference_protected(keypairs->previous_keypair, lockdep_is_held(&keypairs->keypair_update_lock)); + next_keypair = rcu_dereference_protected(keypairs->next_keypair, lockdep_is_held(&keypairs->keypair_update_lock)); + current_keypair = rcu_dereference_protected(keypairs->current_keypair, lockdep_is_held(&keypairs->keypair_update_lock)); + if (new_keypair->i_am_the_initiator) { + if (next_keypair) { + rcu_assign_pointer(keypairs->next_keypair, NULL); + rcu_assign_pointer(keypairs->previous_keypair, next_keypair); + noise_keypair_put(current_keypair); + } else + rcu_assign_pointer(keypairs->previous_keypair, current_keypair); + noise_keypair_put(previous_keypair); + rcu_assign_pointer(keypairs->current_keypair, new_keypair); + } else { + rcu_assign_pointer(keypairs->next_keypair, new_keypair); + noise_keypair_put(next_keypair); + rcu_assign_pointer(keypairs->previous_keypair, NULL); + noise_keypair_put(previous_keypair); + } + mutex_unlock(&keypairs->keypair_update_lock); +} + +bool noise_received_with_keypair(struct noise_keypairs *keypairs, struct noise_keypair *received_keypair) +{ + bool ret = false; + struct noise_keypair *old_keypair; + + /* TODO: probably this needs the actual mutex, but we're in atomic context, + * so we can't take it here. Instead we just rely on RCU for the lookups. */ + rcu_read_lock(); + if (unlikely(received_keypair == rcu_dereference(keypairs->next_keypair))) { + ret = true; + old_keypair = rcu_dereference(keypairs->previous_keypair); + rcu_assign_pointer(keypairs->previous_keypair, rcu_dereference(keypairs->current_keypair)); + noise_keypair_put(old_keypair); + rcu_assign_pointer(keypairs->current_keypair, received_keypair); + rcu_assign_pointer(keypairs->next_keypair, NULL); + } + rcu_read_unlock(); + + return ret; +} + +void noise_set_static_identity_private_key(struct noise_static_identity *static_identity, const u8 private_key[NOISE_PUBLIC_KEY_LEN]) +{ + down_write(&static_identity->lock); + if (private_key) { + memcpy(static_identity->static_private, private_key, NOISE_PUBLIC_KEY_LEN); + curve25519_generate_public(static_identity->static_public, private_key); + static_identity->has_identity = true; + } else { + memset(static_identity->static_private, 0, NOISE_PUBLIC_KEY_LEN); + memset(static_identity->static_public, 0, NOISE_PUBLIC_KEY_LEN); + static_identity->has_identity = false; + } + up_write(&static_identity->lock); +} + +void noise_set_static_identity_preshared_key(struct noise_static_identity *static_identity, const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]) +{ + down_write(&static_identity->lock); + if (preshared_key) { + memcpy(static_identity->preshared_key, preshared_key, NOISE_SYMMETRIC_KEY_LEN); + static_identity->has_psk = true; + } else { + memset(static_identity->preshared_key, 0, NOISE_SYMMETRIC_KEY_LEN); + static_identity->has_psk = false; + } + up_write(&static_identity->lock); +} + +/* This is Hugo Krawczyk's HKDF: + * - https://eprint.iacr.org/2010/264.pdf + * - https://tools.ietf.org/html/rfc5869 + */ +static void kdf(u8 *first_dst, u8 *second_dst, const u8 *data, + size_t first_len, size_t second_len, size_t data_len, + const u8 chaining_key[NOISE_HASH_LEN]) +{ + u8 secret[BLAKE2S_OUTBYTES]; + u8 output[BLAKE2S_OUTBYTES + 1]; + BUG_ON(first_len > BLAKE2S_OUTBYTES || second_len > BLAKE2S_OUTBYTES); + + /* Extract entropy from data into secret */ + blake2s_hmac(secret, data, chaining_key, BLAKE2S_OUTBYTES, data_len, NOISE_HASH_LEN); + + /* Expand first key: key = secret, data = 0x1 */ + output[0] = 1; + blake2s_hmac(output, output, secret, BLAKE2S_OUTBYTES, 1, BLAKE2S_OUTBYTES); + memcpy(first_dst, output, first_len); + + /* Expand second key: key = secret, data = first-key || 0x2 */ + output[BLAKE2S_OUTBYTES] = 2; + blake2s_hmac(output, output, secret, BLAKE2S_OUTBYTES, BLAKE2S_OUTBYTES + 1, BLAKE2S_OUTBYTES); + memcpy(second_dst, output, second_len); + + /* Clear sensitive data from stack */ + memzero_explicit(secret, BLAKE2S_OUTBYTES); + memzero_explicit(output, BLAKE2S_OUTBYTES + 1); +} + +static void symmetric_key_init(struct noise_symmetric_key *key) +{ + spin_lock_init(&key->counter.receive.lock); + atomic64_set(&key->counter.counter, 0); + key->counter.receive.backtrack = 0; + key->birthdate = get_jiffies_64(); + key->is_valid = true; +} + +static void derive_keys(struct noise_symmetric_key *first_dst, struct noise_symmetric_key *second_dst, const u8 chaining_key[NOISE_HASH_LEN]) +{ + kdf(first_dst->key, second_dst->key, NULL, NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, chaining_key); + symmetric_key_init(first_dst); + symmetric_key_init(second_dst); +} + +static void mix_key(u8 key[NOISE_SYMMETRIC_KEY_LEN], u8 chaining_key[NOISE_HASH_LEN], const u8 *src, size_t src_len) +{ + kdf(chaining_key, key, src, NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, src_len, chaining_key); +} + +static void mix_dh(u8 key[NOISE_SYMMETRIC_KEY_LEN], u8 chaining_key[NOISE_HASH_LEN], + const u8 private[NOISE_PUBLIC_KEY_LEN], const u8 public[NOISE_PUBLIC_KEY_LEN]) +{ + u8 dh_calculation[NOISE_PUBLIC_KEY_LEN]; + curve25519(dh_calculation, private, public); + mix_key(key, chaining_key, dh_calculation, NOISE_PUBLIC_KEY_LEN); + memzero_explicit(dh_calculation, NOISE_PUBLIC_KEY_LEN); +} + +static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len) +{ + struct blake2s_state blake; + blake2s_init(&blake, NOISE_HASH_LEN); + blake2s_update(&blake, hash, NOISE_HASH_LEN); + blake2s_update(&blake, src, src_len); + blake2s_final(&blake, hash, NOISE_HASH_LEN); +} + +static void handshake_init(u8 key[NOISE_SYMMETRIC_KEY_LEN], u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN], + const u8 remote_static[NOISE_PUBLIC_KEY_LEN], const u8 psk[NOISE_SYMMETRIC_KEY_LEN]) +{ + memset(key, 0, NOISE_SYMMETRIC_KEY_LEN); + memcpy(hash, psk ? handshake_psk_name_hash : handshake_name_hash, NOISE_HASH_LEN); + mix_hash(hash, identifier_name, sizeof(identifier_name)); + if (psk) { + u8 temp_hash[NOISE_HASH_LEN]; + kdf(chaining_key, temp_hash, psk, NOISE_HASH_LEN, NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, handshake_psk_name_hash); + mix_hash(hash, temp_hash, NOISE_HASH_LEN); + memzero_explicit(temp_hash, NOISE_HASH_LEN); + } else + memcpy(chaining_key, handshake_name_hash, NOISE_HASH_LEN); + mix_hash(hash, remote_static, NOISE_PUBLIC_KEY_LEN); +} + +static bool handshake_encrypt(u8 *dst_ciphertext, const u8 *src_plaintext, size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], u8 hash[NOISE_HASH_LEN]) +{ + if (!chacha20poly1305_encrypt(dst_ciphertext, src_plaintext, src_len, hash, NOISE_HASH_LEN, 0 /* Always zero for Noise_IK */, key)) + return false; + mix_hash(hash, dst_ciphertext, noise_encrypted_len(src_len)); + return true; +} + +static bool handshake_decrypt(u8 *dst_plaintext, const u8 *src_ciphertext, size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], u8 hash[NOISE_HASH_LEN]) +{ + if (!chacha20poly1305_decrypt(dst_plaintext, src_ciphertext, src_len, hash, NOISE_HASH_LEN, 0 /* Always zero for Noise_IK */, key)) + return false; + mix_hash(hash, src_ciphertext, src_len); + return true; +} + +static void handshake_nocrypt(u8 *dst, const u8 *src, size_t src_len, u8 hash[NOISE_HASH_LEN]) +{ + memcpy(dst, src, src_len); + mix_hash(hash, src, src_len); +} + +static void tai64n_now(u8 output[NOISE_TIMESTAMP_LEN]) +{ + struct timeval now; + do_gettimeofday(&now); + /* http://cr.yp.to/libtai/tai64.html */ + *(__be64 *)output = cpu_to_be64(now.tv_sec); + *(__be32 *)(output + sizeof(__be64)) = cpu_to_be32(1000 * now.tv_usec + 500); +} + +bool noise_handshake_create_initiation(struct message_handshake_initiation *dst, struct noise_handshake *handshake) +{ + u8 timestamp[NOISE_TIMESTAMP_LEN]; + bool ret = false; + + down_read(&handshake->static_identity->lock); + down_write(&handshake->lock); + + if (unlikely(!handshake->static_identity->has_identity)) + goto out; + + dst->header.type = MESSAGE_HANDSHAKE_INITIATION; + + handshake_init(handshake->key, handshake->chaining_key, handshake->hash, handshake->remote_static, + handshake->static_identity->has_psk ? handshake->static_identity->preshared_key : NULL); + + /* e */ + curve25519_generate_secret(handshake->ephemeral_private); + curve25519_generate_public(handshake->ephemeral_public, handshake->ephemeral_private); + handshake_nocrypt(dst->unencrypted_ephemeral, handshake->ephemeral_public, NOISE_PUBLIC_KEY_LEN, handshake->hash); + if (handshake->static_identity->has_psk) + mix_key(handshake->key, handshake->chaining_key, handshake->ephemeral_public, NOISE_PUBLIC_KEY_LEN); + + /* dhes */ + mix_dh(handshake->key, handshake->chaining_key, handshake->ephemeral_private, handshake->remote_static); + + /* s */ + if (!handshake_encrypt(dst->encrypted_static, handshake->static_identity->static_public, NOISE_PUBLIC_KEY_LEN, handshake->key, handshake->hash)) + goto out; + + /* dhss */ + mix_dh(handshake->key, handshake->chaining_key, handshake->static_identity->static_private, handshake->remote_static); + + /* t */ + tai64n_now(timestamp); + if (!handshake_encrypt(dst->encrypted_timestamp, timestamp, NOISE_TIMESTAMP_LEN, handshake->key, handshake->hash)) + goto out; + + dst->sender_index = index_hashtable_insert(&handshake->entry.peer->device->index_hashtable, &handshake->entry); + + ret = true; + handshake->state = HANDSHAKE_CREATED_INITIATION; + +out: + up_write(&handshake->lock); + up_read(&handshake->static_identity->lock); + return ret; +} + +struct wireguard_peer *noise_handshake_consume_initiation(struct message_handshake_initiation *src, struct wireguard_device *wg) +{ + bool replay_attack, flood_attack; + u8 s[NOISE_PUBLIC_KEY_LEN]; + u8 e[NOISE_PUBLIC_KEY_LEN]; + u8 t[NOISE_TIMESTAMP_LEN]; + struct noise_handshake *handshake; + struct wireguard_peer *wg_peer = NULL; + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + u8 hash[NOISE_HASH_LEN]; + u8 chaining_key[NOISE_HASH_LEN]; + + down_read(&wg->static_identity.lock); + if (unlikely(!wg->static_identity.has_identity)) + goto out; + + handshake_init(key, chaining_key, hash, wg->static_identity.static_public, + wg->static_identity.has_psk ? wg->static_identity.preshared_key : NULL); + + /* e */ + handshake_nocrypt(e, src->unencrypted_ephemeral, sizeof(src->unencrypted_ephemeral), hash); + if (wg->static_identity.has_psk) + mix_key(key, chaining_key, e, NOISE_PUBLIC_KEY_LEN); + + /* dhes */ + mix_dh(key, chaining_key, wg->static_identity.static_private, e); + + /* s */ + if (!handshake_decrypt(s, src->encrypted_static, sizeof(src->encrypted_static), key, hash)) + goto out; + + /* dhss */ + mix_dh(key, chaining_key, wg->static_identity.static_private, s); + + /* t */ + if (!handshake_decrypt(t, src->encrypted_timestamp, sizeof(src->encrypted_timestamp), key, hash)) + goto out; + + /* Lookup which peer we're actually talking to */ + wg_peer = pubkey_hashtable_lookup(&wg->peer_hashtable, s); + if (!wg_peer) + goto out; + handshake = &wg_peer->handshake; + down_read(&handshake->lock); + replay_attack = memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) <= 0; + flood_attack = !time_is_before_jiffies64(handshake->last_initiation_consumption + INITIATIONS_PER_SECOND); + up_read(&handshake->lock); + if (replay_attack || flood_attack) { + peer_put(wg_peer); + wg_peer = NULL; + goto out; + } + + /* Success! Copy everything to peer */ + down_write(&handshake->lock); + memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); + memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN); + memcpy(handshake->key, key, NOISE_SYMMETRIC_KEY_LEN); + memcpy(handshake->hash, hash, NOISE_HASH_LEN); + memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); + handshake->remote_index = src->sender_index; + handshake->last_initiation_consumption = get_jiffies_64(); + handshake->state = HANDSHAKE_CONSUMED_INITIATION; + up_write(&handshake->lock); + +out: + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + memzero_explicit(hash, NOISE_HASH_LEN); + memzero_explicit(chaining_key, NOISE_HASH_LEN); + up_read(&wg->static_identity.lock); + return wg_peer; +} + +bool noise_handshake_create_response(struct message_handshake_response *dst, struct noise_handshake *handshake) +{ + bool ret = false; + down_read(&handshake->static_identity->lock); + down_write(&handshake->lock); + + if (handshake->state != HANDSHAKE_CONSUMED_INITIATION) + goto out; + + dst->header.type = MESSAGE_HANDSHAKE_RESPONSE; + dst->receiver_index = handshake->remote_index; + + /* e */ + curve25519_generate_secret(handshake->ephemeral_private); + curve25519_generate_public(handshake->ephemeral_public, handshake->ephemeral_private); + handshake_nocrypt(dst->unencrypted_ephemeral, handshake->ephemeral_public, NOISE_PUBLIC_KEY_LEN, handshake->hash); + if (handshake->static_identity->has_psk) + mix_key(handshake->key, handshake->chaining_key, handshake->ephemeral_public, NOISE_PUBLIC_KEY_LEN); + + /* dhee */ + mix_dh(handshake->key, handshake->chaining_key, handshake->ephemeral_private, handshake->remote_ephemeral); + + /* dhes */ + mix_dh(handshake->key, handshake->chaining_key, handshake->ephemeral_private, handshake->remote_static); + + if (!handshake_encrypt(dst->encrypted_nothing, NULL, 0, handshake->key, handshake->hash)) + goto out; + + dst->sender_index = index_hashtable_insert(&handshake->entry.peer->device->index_hashtable, &handshake->entry); + + handshake->state = HANDSHAKE_CREATED_RESPONSE; + ret = true; + +out: + up_write(&handshake->lock); + up_read(&handshake->static_identity->lock); + return ret; +} + +struct wireguard_peer *noise_handshake_consume_response(struct message_handshake_response *src, struct wireguard_device *wg) +{ + struct noise_handshake *handshake; + struct wireguard_peer *ret_peer = NULL; + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + u8 hash[NOISE_HASH_LEN]; + u8 chaining_key[NOISE_HASH_LEN]; + u8 e[NOISE_PUBLIC_KEY_LEN]; + u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; + u8 static_private[NOISE_PUBLIC_KEY_LEN]; + enum noise_handshake_state state = HANDSHAKE_ZEROED; + + down_read(&wg->static_identity.lock); + + if (unlikely(!wg->static_identity.has_identity)) + goto out; + + handshake = (struct noise_handshake *)index_hashtable_lookup(&wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE, src->receiver_index); + if (unlikely(!handshake)) + goto out; + + down_read(&handshake->lock); + state = handshake->state; + memcpy(key, handshake->key, NOISE_SYMMETRIC_KEY_LEN); + memcpy(hash, handshake->hash, NOISE_HASH_LEN); + memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN); + memcpy(ephemeral_private, handshake->ephemeral_private, NOISE_PUBLIC_KEY_LEN); + up_read(&handshake->lock); + + if (state != HANDSHAKE_CREATED_INITIATION) + goto fail; + + /* e */ + handshake_nocrypt(e, src->unencrypted_ephemeral, sizeof(src->unencrypted_ephemeral), hash); + if (wg->static_identity.has_psk) + mix_key(key, chaining_key, e, NOISE_PUBLIC_KEY_LEN); + + /* dhee */ + mix_dh(key, chaining_key, ephemeral_private, e); + + /* dhes */ + mix_dh(key, chaining_key, wg->static_identity.static_private, e); + + /* decrypt nothing */ + if (!handshake_decrypt(NULL, src->encrypted_nothing, sizeof(src->encrypted_nothing), key, hash)) + goto fail; + + /* Success! Copy everything to peer */ + down_write(&handshake->lock); + memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN); + memcpy(handshake->key, key, NOISE_SYMMETRIC_KEY_LEN); + memcpy(handshake->hash, hash, NOISE_HASH_LEN); + memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN); + handshake->remote_index = src->sender_index; + handshake->state = HANDSHAKE_CONSUMED_RESPONSE; + up_write(&handshake->lock); + ret_peer = handshake->entry.peer; + goto out; + +fail: + peer_put(handshake->entry.peer); +out: + memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN); + memzero_explicit(hash, NOISE_HASH_LEN); + memzero_explicit(chaining_key, NOISE_HASH_LEN); + memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN); + memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN); + up_read(&wg->static_identity.lock); + return ret_peer; +} + +bool noise_handshake_begin_session(struct noise_handshake *handshake, struct noise_keypairs *keypairs, bool i_am_the_initiator) +{ + struct noise_keypair *new_keypair; + + down_read(&handshake->lock); + if (handshake->state != HANDSHAKE_CREATED_RESPONSE && handshake->state != HANDSHAKE_CONSUMED_RESPONSE) + goto fail; + + new_keypair = keypair_create(handshake->entry.peer); + if (!new_keypair) + goto fail; + new_keypair->i_am_the_initiator = i_am_the_initiator; + new_keypair->remote_index = handshake->remote_index; + + if (i_am_the_initiator) + derive_keys(&new_keypair->sending, &new_keypair->receiving, handshake->chaining_key); + else + derive_keys(&new_keypair->receiving, &new_keypair->sending, handshake->chaining_key); + up_read(&handshake->lock); + + add_new_keypair(keypairs, new_keypair); + index_hashtable_replace(&handshake->entry.peer->device->index_hashtable, &handshake->entry, &new_keypair->entry); + noise_handshake_clear(handshake); + net_dbg_ratelimited("Keypair %Lu created for peer %Lu\n", new_keypair->internal_id, new_keypair->entry.peer->internal_id); + + return true; + +fail: + up_read(&handshake->lock); + return false; +} diff --git a/src/noise.h b/src/noise.h new file mode 100644 index 0000000..65ca9d8 --- /dev/null +++ b/src/noise.h @@ -0,0 +1,153 @@ +/* + * Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. + * + * See doc/protocol.md and https://github.com/trevp/noise/blob/master/noise.md for more info + */ + +#ifndef NOISE_H +#define NOISE_H + +#include "crypto/curve25519.h" +#include "crypto/chacha20poly1305.h" +#include "crypto/blake2s.h" +#include +#include +#include +#include +#include +#include + +enum index_hashtable_type { + INDEX_HASHTABLE_HANDSHAKE = (1 << 0), + INDEX_HASHTABLE_KEYPAIR = (1 << 1) +}; + +struct index_hashtable_entry { + struct wireguard_peer *peer; + struct hlist_node index_hash; + enum index_hashtable_type type; + __le32 index; +}; + +enum noise_lengths { + NOISE_PUBLIC_KEY_LEN = CURVE25519_POINT_SIZE, + NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEYLEN, + NOISE_TIMESTAMP_LEN = sizeof(u64) + sizeof(u32), + NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAGLEN, + NOISE_HASH_LEN = BLAKE2S_OUTBYTES +}; + +enum wireguard_limits { + REKEY_AFTER_MESSAGES = U64_MAX - 0xffff, + REJECT_AFTER_MESSAGES = U64_MAX - 0xf, /* It's important that this value is always at *least* one less than U64_MAX. */ + REKEY_TIMEOUT = 5 * HZ, + REKEY_AFTER_TIME = 120 * HZ, + REJECT_AFTER_TIME = 180 * HZ, + INITIATIONS_PER_SECOND = HZ / 50, + MAX_PEERS_PER_DEVICE = U16_MAX +}; + +union noise_counter { + struct { + u64 counter; + unsigned long backtrack; + spinlock_t lock; + } receive; + atomic64_t counter; +}; + +struct noise_symmetric_key { + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + union noise_counter counter; + uint64_t birthdate; + bool is_valid; +}; + +struct noise_keypair { + struct index_hashtable_entry entry; + struct noise_symmetric_key sending; + struct noise_symmetric_key receiving; + __le32 remote_index; + bool i_am_the_initiator; + struct kref refcount; + struct rcu_head rcu; + uint64_t internal_id; +}; + +struct noise_keypairs { + struct noise_keypair __rcu *current_keypair; + struct noise_keypair __rcu *previous_keypair; + struct noise_keypair __rcu *next_keypair; + struct mutex keypair_update_lock; +}; + +struct noise_static_identity { + bool has_identity, has_psk; + u8 static_public[NOISE_PUBLIC_KEY_LEN]; + u8 static_private[NOISE_PUBLIC_KEY_LEN]; + u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]; + struct rw_semaphore lock; +}; + +enum noise_handshake_state { + HANDSHAKE_ZEROED, + HANDSHAKE_CREATED_INITIATION, + HANDSHAKE_CONSUMED_INITIATION, + HANDSHAKE_CREATED_RESPONSE, + HANDSHAKE_CONSUMED_RESPONSE +}; + +struct noise_handshake { + struct index_hashtable_entry entry; + + enum noise_handshake_state state; + uint64_t last_initiation_consumption; + + struct noise_static_identity *static_identity; + + u8 ephemeral_public[NOISE_PUBLIC_KEY_LEN]; + u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN]; + + u8 remote_static[NOISE_PUBLIC_KEY_LEN]; + u8 remote_ephemeral[NOISE_PUBLIC_KEY_LEN]; + + u8 key[NOISE_SYMMETRIC_KEY_LEN]; + u8 hash[NOISE_HASH_LEN]; + u8 chaining_key[NOISE_HASH_LEN]; + + u8 latest_timestamp[NOISE_TIMESTAMP_LEN]; + __le32 remote_index; + + /* Protects all members except the immutable (after noise_peer_init): remote_static, static_identity */ + struct rw_semaphore lock; +}; + +#define noise_encrypted_len(plain_len) (plain_len + NOISE_AUTHTAG_LEN) + +struct wireguard_peer; +struct wireguard_device; +struct message_header; +struct message_handshake_initiation; +struct message_handshake_response; +struct message_data; +struct message_handshake_cookie; + +void noise_init(void); +void noise_handshake_init(struct noise_handshake *handshake, struct noise_static_identity *static_identity, const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], struct wireguard_peer *peer); +void noise_handshake_clear(struct noise_handshake *handshake); +void noise_keypair_put(struct noise_keypair *keypair); +void noise_keypairs_clear(struct noise_keypairs *keypairs); +bool noise_received_with_keypair(struct noise_keypairs *keypairs, struct noise_keypair *received_keypair); + +void noise_set_static_identity_private_key(struct noise_static_identity *static_identity, const u8 private_key[NOISE_PUBLIC_KEY_LEN]); +void noise_set_static_identity_preshared_key(struct noise_static_identity *static_identity, const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]); + +bool noise_handshake_create_initiation(struct message_handshake_initiation *dst, struct noise_handshake *handshake); +struct wireguard_peer *noise_handshake_consume_initiation(struct message_handshake_initiation *src, struct wireguard_device *wg); + +bool noise_handshake_create_response(struct message_handshake_response *dst, struct noise_handshake *peer); +struct wireguard_peer *noise_handshake_consume_response(struct message_handshake_response *src, struct wireguard_device *wg); + +bool noise_handshake_begin_session(struct noise_handshake *handshake, struct noise_keypairs *keypairs, bool i_am_the_initiator); + +#endif diff --git a/src/packets.h b/src/packets.h new file mode 100644 index 0000000..a34acb9 --- /dev/null +++ b/src/packets.h @@ -0,0 +1,61 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef PACKETS_H +#define PACKETS_H + +#include "noise.h" +#include "messages.h" +#include "socket.h" + +#include + +enum { + MAX_QUEUED_HANDSHAKES = 4096, + MAX_BURST_HANDSHAKES = 16 +}; + +/* AF41, plus 00 ECN */ +#define HANDSHAKE_DSCP 0b10001000 + +struct wireguard_device; +struct wireguard_peer; +struct sk_buff; + +/* receive.c */ +void packet_receive(struct wireguard_device *wg, struct sk_buff *skb); + +/* send.c */ +int packet_send_queue(struct wireguard_peer *peer); +void packet_send_keepalive(struct wireguard_peer *peer); +void packet_send_handshake_initiation(struct wireguard_peer *peer); +void packet_send_handshake_response(struct wireguard_peer *peer); +void packet_send_handshake_cookie(struct wireguard_device *wg, struct sk_buff *initiating_skb, void *data, size_t data_len, __le32 sender_index); + +void packet_queue_send_handshake_initiation(struct wireguard_peer *peer); +void packet_process_queued_handshake_packets(struct work_struct *work); +void packet_send_queued_handshakes(struct work_struct *work); + + +/* data.c */ +struct packet_data_encryption_ctx { + struct padata_priv padata; + struct sk_buff *skb; + void (*callback)(struct sk_buff *, struct wireguard_peer *); + struct wireguard_peer *peer; + size_t plaintext_len, trailer_len; + unsigned int num_frags; + struct sk_buff *trailer; + struct noise_keypair *keypair; + uint64_t nonce; +}; + +int packet_create_data(struct sk_buff *skb, struct wireguard_peer *peer, void(*callback)(struct sk_buff *, struct wireguard_peer *), bool parallel); +void packet_consume_data(struct sk_buff *skb, size_t offset, struct wireguard_device *wg, void(*callback)(struct sk_buff *, struct wireguard_peer *, struct sockaddr_storage *, bool used_new_key, int err)); + +#define DATA_PACKET_HEAD_ROOM ALIGN(sizeof(struct message_data) + max(sizeof(struct packet_data_encryption_ctx), SKB_HEADER_LEN), 4) + +#ifdef DEBUG +void packet_counter_selftest(void); +#endif + +#endif diff --git a/src/peer.c b/src/peer.c new file mode 100644 index 0000000..ad48a4e --- /dev/null +++ b/src/peer.c @@ -0,0 +1,144 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include "wireguard.h" +#include "peer.h" +#include "packets.h" +#include "timers.h" +#include "hashtables.h" +#include "noise.h" +#include +#include +#include +#include + +static atomic64_t peer_counter = ATOMIC64_INIT(0); + +struct wireguard_peer *peer_create(struct wireguard_device *wg, const u8 public_key[NOISE_PUBLIC_KEY_LEN]) +{ + struct wireguard_peer *peer; + lockdep_assert_held(&wg->device_update_lock); + + if (peer_total_count(wg) >= MAX_PEERS_PER_DEVICE) + return NULL; + + peer = kzalloc(sizeof(struct wireguard_peer), GFP_KERNEL); + if (!peer) + return NULL; + + peer->internal_id = atomic64_inc_return(&peer_counter); + peer->device = wg; + cookie_init(&peer->latest_cookie); + noise_handshake_init(&peer->handshake, &wg->static_identity, public_key, peer); + mutex_init(&peer->keypairs.keypair_update_lock); + INIT_WORK(&peer->transmit_handshake_work, packet_send_queued_handshakes); + rwlock_init(&peer->endpoint_lock); + skb_queue_head_init(&peer->tx_packet_queue); + kref_init(&peer->refcount); + pubkey_hashtable_add(&wg->peer_hashtable, peer); + list_add_tail(&peer->peer_list, &wg->peer_list); + pr_debug("Peer %Lu created\n", peer->internal_id); + return peer; +} + +struct wireguard_peer *peer_get(struct wireguard_peer *peer) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "Calling peer_get without holding the RCU read lock."); +#else + rcu_lockdep_assert(rcu_read_lock_held(), "Calling peer_get without holding the RCU read lock."); +#endif + if (!peer) + return NULL; + if (!kref_get_unless_zero(&peer->refcount)) + return NULL; + return peer; +} + +void peer_remove(struct wireguard_peer *peer) +{ + if (!peer) + return; + lockdep_assert_held(&peer->device->device_update_lock); + + list_del(&peer->peer_list); + noise_handshake_clear(&peer->handshake); + noise_keypairs_clear(&peer->keypairs); + routing_table_remove_by_peer(&peer->device->peer_routing_table, peer); + pubkey_hashtable_remove(&peer->device->peer_hashtable, peer); + if (peer->device->workqueue) + flush_workqueue(peer->device->workqueue); + skb_queue_purge(&peer->tx_packet_queue); + peer_put(peer); +} + +static void rcu_release(struct rcu_head *rcu) +{ + struct wireguard_peer *peer = container_of(rcu, struct wireguard_peer, rcu); + pr_debug("Peer %Lu (%pISpfsc) destroyed\n", peer->internal_id, &peer->endpoint_addr); + timers_uninit_peer(peer); + skb_queue_purge(&peer->tx_packet_queue); + if (peer->endpoint_dst) + dst_release(peer->endpoint_dst); + memzero_explicit(peer, sizeof(struct wireguard_peer)); + kfree(peer); +} + +static void kref_release(struct kref *refcount) +{ + struct wireguard_peer *peer = container_of(refcount, struct wireguard_peer, refcount); + call_rcu(&peer->rcu, rcu_release); +} + +void peer_put(struct wireguard_peer *peer) +{ + if (!peer) + return; + kref_put(&peer->refcount, kref_release); +} + +int peer_for_each_unlocked(struct wireguard_device *wg, int (*fn)(struct wireguard_peer *peer, void *ctx), void *data) +{ + struct wireguard_peer *peer, *temp; + int ret = 0; + + lockdep_assert_held(&wg->device_update_lock); + list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) { + rcu_read_lock(); + peer = peer_get(peer); + rcu_read_unlock(); + if (unlikely(!peer)) + continue; + ret = fn(peer, data); + peer_put(peer); + if (ret < 0) + break; + } + return ret; +} + +int peer_for_each(struct wireguard_device *wg, int (*fn)(struct wireguard_peer *peer, void *ctx), void *data) +{ + int ret; + mutex_lock(&wg->device_update_lock); + ret = peer_for_each_unlocked(wg, fn, data); + mutex_unlock(&wg->device_update_lock); + return ret; +} + +void peer_remove_all(struct wireguard_device *wg) +{ + struct wireguard_peer *peer, *temp; + lockdep_assert_held(&wg->device_update_lock); + list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) + peer_remove(peer); +} + +unsigned int peer_total_count(struct wireguard_device *wg) +{ + unsigned int i = 0; + struct wireguard_peer *peer; + lockdep_assert_held(&wg->device_update_lock); + list_for_each_entry(peer, &wg->peer_list, peer_list) + ++i; + return i; +} diff --git a/src/peer.h b/src/peer.h new file mode 100644 index 0000000..05ee7bd --- /dev/null +++ b/src/peer.h @@ -0,0 +1,55 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef PEER_H +#define PEER_H + +#include "wireguard.h" +#include "noise.h" +#include "cookie.h" +#include +#include +#include +#include + +struct wireguard_peer { + struct wireguard_device *device; + struct sockaddr_storage endpoint_addr; + struct dst_entry *endpoint_dst; + union { + struct flowi4 fl4; + struct flowi6 fl6; + } endpoint_flow; + rwlock_t endpoint_lock; + struct noise_handshake handshake; + struct noise_keypairs keypairs; + uint64_t last_sent_handshake; + struct work_struct transmit_handshake_work, clear_peer_work; + struct cookie latest_cookie; + struct hlist_node pubkey_hash; + uint64_t rx_bytes, tx_bytes; + struct timer_list timer_retransmit_handshake, timer_send_keepalive, timer_new_handshake, timer_kill_ephemerals; + unsigned int timer_handshake_attempts; + bool timer_need_another_keepalive; + struct timeval walltime_last_handshake; + struct sk_buff_head tx_packet_queue; + struct kref refcount; + struct rcu_head rcu; + struct list_head peer_list; + uint64_t internal_id; +}; + +struct wireguard_peer *peer_create(struct wireguard_device *wg, const u8 public_key[NOISE_PUBLIC_KEY_LEN]); + +struct wireguard_peer *peer_get(struct wireguard_peer *peer); +void peer_put(struct wireguard_peer *peer); +void peer_remove(struct wireguard_peer *peer); +void peer_remove_all(struct wireguard_device *wg); + +struct wireguard_peer *peer_lookup_by_index(struct wireguard_device *wg, u32 index); + +int peer_for_each_unlocked(struct wireguard_device *wg, int (*fn)(struct wireguard_peer *peer, void *ctx), void *data); +int peer_for_each(struct wireguard_device *wg, int (*fn)(struct wireguard_peer *peer, void *ctx), void *data); + +unsigned int peer_total_count(struct wireguard_device *wg); + +#endif diff --git a/src/ratelimiter.c b/src/ratelimiter.c new file mode 100644 index 0000000..6bf85b0 --- /dev/null +++ b/src/ratelimiter.c @@ -0,0 +1,119 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include "wireguard.h" +#include "ratelimiter.h" +#include +#include +#include + +enum { + RATELIMITER_PACKETS_PER_SECOND = 75, + RATELIMITER_PACKETS_BURSTABLE = 5 +}; + +static inline void cfg_init(struct hashlimit_cfg1 *cfg, int family) +{ + memset(cfg, 0, sizeof(struct hashlimit_cfg1)); + if (family == NFPROTO_IPV4) + cfg->srcmask = 32; + else if (family == NFPROTO_IPV6) + cfg->srcmask = 96; + cfg->mode = XT_HASHLIMIT_HASH_SIP; /* source IP only -- we could also do source port by ORing this with XT_HASHLIMIT_HASH_SPT */ + cfg->avg = XT_HASHLIMIT_SCALE / RATELIMITER_PACKETS_PER_SECOND; /* 50 per second per IP */ + cfg->burst = RATELIMITER_PACKETS_BURSTABLE; /* Allow bursts of 5 at a time */ + cfg->gc_interval = 1000; /* same as expiration date */ + cfg->expire = 1000; /* Units of avg (seconds = 1) times 1000 */ + /* cfg->size and cfg->max are computed based on the memory size of left to zero */ +} + +int ratelimiter_init(struct ratelimiter *ratelimiter, struct wireguard_device *wg) +{ + struct net_device *dev = netdev_pub(wg); + struct xt_mtchk_param chk = { .net = wg->creating_net }; + int ret; + + memset(ratelimiter, 0, sizeof(struct ratelimiter)); + + cfg_init(&ratelimiter->v4_info.cfg, NFPROTO_IPV4); + cfg_init(&ratelimiter->v6_info.cfg, NFPROTO_IPV6); + memcpy(ratelimiter->v4_info.name, dev->name, IFNAMSIZ); + memcpy(ratelimiter->v6_info.name, dev->name, IFNAMSIZ); + + ratelimiter->v4_match = xt_request_find_match(NFPROTO_IPV4, "hashlimit", 1); + if (IS_ERR(ratelimiter->v4_match)) { + pr_err("The xt_hashlimit module is required"); + return PTR_ERR(ratelimiter->v4_match); + } + + chk.matchinfo = &ratelimiter->v4_info; + chk.match = ratelimiter->v4_match; + chk.family = NFPROTO_IPV4; + ret = ratelimiter->v4_match->checkentry(&chk); + if (ret < 0) { + module_put(ratelimiter->v4_match->me); + return ret; + } + + ratelimiter->v6_match = xt_request_find_match(NFPROTO_IPV6, "hashlimit", 1); + if (IS_ERR(ratelimiter->v6_match)) { + pr_err("The xt_hashlimit module is required"); + module_put(ratelimiter->v4_match->me); + return PTR_ERR(ratelimiter->v6_match); + } + + chk.matchinfo = &ratelimiter->v6_info; + chk.match = ratelimiter->v6_match; + chk.family = NFPROTO_IPV6; + ret = ratelimiter->v6_match->checkentry(&chk); + if (ret < 0) { + struct xt_mtdtor_param dtor_v4 = { + .net = wg->creating_net, + .match = ratelimiter->v4_match, + .matchinfo = &ratelimiter->v4_info, + .family = NFPROTO_IPV4 + }; + ratelimiter->v4_match->destroy(&dtor_v4); + module_put(ratelimiter->v4_match->me); + module_put(ratelimiter->v6_match->me); + return ret; + } + + ratelimiter->net = wg->creating_net; + return 0; +} + +void ratelimiter_uninit(struct ratelimiter *ratelimiter) +{ + struct xt_mtdtor_param dtor = { .net = ratelimiter->net }; + + dtor.match = ratelimiter->v4_match; + dtor.matchinfo = &ratelimiter->v4_info; + dtor.family = NFPROTO_IPV4; + ratelimiter->v4_match->destroy(&dtor); + module_put(ratelimiter->v4_match->me); + + dtor.match = ratelimiter->v6_match; + dtor.matchinfo = &ratelimiter->v6_info; + dtor.family = NFPROTO_IPV6; + ratelimiter->v6_match->destroy(&dtor); + module_put(ratelimiter->v6_match->me); +} + +bool ratelimiter_allow(struct ratelimiter *ratelimiter, struct sk_buff *skb) +{ + struct xt_action_param action = { { NULL } }; + if (unlikely(skb->len < sizeof(struct iphdr))) + return false; + if (ip_hdr(skb)->version == 4) { + action.match = ratelimiter->v4_match; + action.matchinfo = &ratelimiter->v4_info; + action.thoff = ip_hdrlen(skb); + action.family = NFPROTO_IPV4; + } else if (ip_hdr(skb)->version == 6) { + action.match = ratelimiter->v6_match; + action.matchinfo = &ratelimiter->v6_info; + action.family = NFPROTO_IPV6; + } else + return false; + return action.match->match(skb, &action); +} diff --git a/src/ratelimiter.h b/src/ratelimiter.h new file mode 100644 index 0000000..dac7752 --- /dev/null +++ b/src/ratelimiter.h @@ -0,0 +1,20 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef RATELIMITER_H +#define RATELIMITER_H + +#include +struct wireguard_device; +struct sk_buff; + +struct ratelimiter { + struct net *net; + struct xt_match *v4_match, *v6_match; + struct xt_hashlimit_mtinfo1 v4_info, v6_info; +}; + +int ratelimiter_init(struct ratelimiter *ratelimiter, struct wireguard_device *wg); +void ratelimiter_uninit(struct ratelimiter *ratelimiter); +bool ratelimiter_allow(struct ratelimiter *ratelimiter, struct sk_buff *skb); + +#endif diff --git a/src/receive.c b/src/receive.c new file mode 100644 index 0000000..293b764 --- /dev/null +++ b/src/receive.c @@ -0,0 +1,301 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include "wireguard.h" +#include "packets.h" +#include "device.h" +#include "timers.h" +#include "messages.h" +#include "cookie.h" +#include +#include +#include + +static inline void rx_stats(struct wireguard_peer *peer, size_t len) +{ + struct pcpu_sw_netstats *tstats = get_cpu_ptr(netdev_pub(peer->device)->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->rx_bytes += len; + ++tstats->rx_packets; + u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); + peer->rx_bytes += len; +} + +static inline void update_latest_addr(struct wireguard_peer *peer, struct sk_buff *skb) +{ + struct sockaddr_storage addr = { 0 }; + if (!socket_addr_from_skb(&addr, skb)) + socket_set_peer_addr(peer, &addr); +} + +static inline int skb_data_offset(struct sk_buff *skb, size_t *data_offset, size_t *data_len) +{ + struct udphdr *udp; +#ifdef DEBUG + struct sockaddr_storage addr = { 0 }; + socket_addr_from_skb(&addr, skb); +#else + static const u8 addr; +#endif + + if (unlikely(skb->len < sizeof(struct iphdr))) + return -EINVAL; + if (unlikely(ip_hdr(skb)->version != 4 && ip_hdr(skb)->version != 6)) + return -EINVAL; + if (unlikely(ip_hdr(skb)->version == 6 && skb->len < sizeof(struct ipv6hdr))) + return -EINVAL; + + udp = udp_hdr(skb); + *data_offset = (u8 *)udp - skb->data; + if (*data_offset + sizeof(struct udphdr) > skb->len) { + net_dbg_ratelimited("Packet isn't big enough to have UDP fields from %pISpfsc\n", &addr); + return -EINVAL; + } + *data_len = ntohs(udp->len); + if (*data_len < sizeof(struct udphdr)) { + net_dbg_ratelimited("UDP packet is reporting too small of a size from %pISpfsc\n", &addr); + return -EINVAL; + } + if (*data_len > skb->len - *data_offset) { + net_dbg_ratelimited("UDP packet is lying about its size from %pISpfsc\n", &addr); + return -EINVAL; + } + *data_len -= sizeof(struct udphdr); + *data_offset = (u8 *)udp + sizeof(struct udphdr) - skb->data; + if (!pskb_may_pull(skb, *data_offset + sizeof(struct message_header))) { + net_dbg_ratelimited("Could not pull header into data section from %pISpfsc\n", &addr); + return -EINVAL; + } + + return 0; +} + +static void receive_handshake_packet(struct wireguard_device *wg, void *data, size_t len, struct sk_buff *skb) +{ + struct wireguard_peer *peer = NULL; + enum message_type message_type; + bool under_load; + enum cookie_mac_state mac_state; + bool packet_needs_cookie; + +#ifdef DEBUG + struct sockaddr_storage addr = { 0 }; + socket_addr_from_skb(&addr, skb); +#else + static const u8 addr; +#endif + + message_type = message_determine_type(data, len); + + if (message_type == MESSAGE_HANDSHAKE_COOKIE) { + net_dbg_ratelimited("Receiving cookie response from %pISpfsc\n", &addr); + cookie_message_consume(data, wg); + return; + } + + under_load = skb_queue_len(&wg->incoming_handshakes) >= MAX_QUEUED_HANDSHAKES / 2; + mac_state = cookie_validate_packet(&wg->cookie_checker, skb, data, len, under_load); + if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) || (!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)) + packet_needs_cookie = false; + else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE) + packet_needs_cookie = true; + else { + net_dbg_ratelimited("Invalid MAC of handshake, dropping packet from %pISpfsc\n", &addr); + return; + } + + switch (message_type) { + case MESSAGE_HANDSHAKE_INITIATION: + if (packet_needs_cookie) { + struct message_handshake_initiation *message = data; + packet_send_handshake_cookie(wg, skb, message, sizeof(*message), message->sender_index); + return; + } + peer = noise_handshake_consume_initiation(data, wg); + if (unlikely(!peer)) { + net_dbg_ratelimited("Invalid handshake initiation from %pISpfsc\n", &addr); + return; + } + net_dbg_ratelimited("Receiving handshake initiation from peer %Lu (%pISpfsc)\n", peer->internal_id, &addr); + update_latest_addr(peer, skb); + packet_send_handshake_response(peer); + break; + case MESSAGE_HANDSHAKE_RESPONSE: + if (packet_needs_cookie) { + struct message_handshake_response *message = data; + packet_send_handshake_cookie(wg, skb, message, sizeof(*message), message->sender_index); + return; + } + peer = noise_handshake_consume_response(data, wg); + if (unlikely(!peer)) { + net_dbg_ratelimited("Invalid handshake response from %pISpfsc\n", &addr); + return; + } + net_dbg_ratelimited("Receiving handshake response from peer %Lu (%pISpfsc)\n", peer->internal_id, &addr); + if (noise_handshake_begin_session(&peer->handshake, &peer->keypairs, true)) { + timers_ephemeral_key_created(peer); + timers_handshake_complete(peer); + packet_send_queue(peer); + } + break; + default: + net_err_ratelimited("Somehow a wrong type of packet wound up in the handshake queue from %pISpfsc!\n", &addr); + BUG(); + return; + } + + BUG_ON(!peer); + + rx_stats(peer, len); + timers_any_authorized_packet_received(peer); + update_latest_addr(peer, skb); + peer_put(peer); +} + +void packet_process_queued_handshake_packets(struct work_struct *work) +{ + struct wireguard_device *wg = container_of(work, struct wireguard_device, incoming_handshakes_work); + struct sk_buff *skb; + size_t len, offset; + size_t num_processed = 0; + + while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) { + if (!skb_data_offset(skb, &offset, &len)) + receive_handshake_packet(wg, skb->data + offset, len, skb); + dev_kfree_skb(skb); + if (++num_processed == MAX_BURST_HANDSHAKES) { + queue_work(wg->workqueue, &wg->incoming_handshakes_work); + return; + } + } +} + +static void receive_data_packet(struct sk_buff *skb, struct wireguard_peer *peer, struct sockaddr_storage *addr, bool used_new_key, int err) +{ + struct net_device *dev; + struct wireguard_peer *routed_peer; + struct wireguard_device *wg; + + if (unlikely(err < 0 || !peer || !addr)) { + dev_kfree_skb(skb); + return; + } + + wg = peer->device; + dev = netdev_pub(wg); + + if (unlikely(used_new_key)) + packet_send_queue(peer); + + /* A packet with length 0 is a keep alive packet */ + if (unlikely(!skb->len)) { + net_dbg_ratelimited("Receiving keepalive packet from peer %Lu (%pISpfsc)\n", peer->internal_id, addr); + goto packet_processed; + } + + if (unlikely(skb->len < sizeof(struct iphdr))) { + ++dev->stats.rx_errors; + ++dev->stats.rx_length_errors; + net_dbg_ratelimited("Packet missing ip header from peer %Lu (%pISpfsc)\n", peer->internal_id, addr); + goto packet_processed; + } + + if (!pskb_may_pull(skb, 1 /* For checking the ip version below */)) { + ++dev->stats.rx_errors; + ++dev->stats.rx_length_errors; + net_dbg_ratelimited("Packet missing IP version from peer %Lu (%pISpfsc)\n", peer->internal_id, addr); + goto packet_processed; + } + + skb->dev = dev; + skb->ip_summed = CHECKSUM_UNNECESSARY; + if (ip_hdr(skb)->version == 4) + skb->protocol = htons(ETH_P_IP); + else if (ip_hdr(skb)->version == 6) { + if (unlikely(skb->len < sizeof(struct ipv6hdr))) { + ++dev->stats.rx_errors; + ++dev->stats.rx_length_errors; + net_dbg_ratelimited("Packet missing ipv6 header from peer %Lu (%pISpfsc)\n", peer->internal_id, addr); + goto packet_processed; + } + skb->protocol = htons(ETH_P_IPV6); + } else { + ++dev->stats.rx_errors; + ++dev->stats.rx_length_errors; + net_dbg_ratelimited("Packet neither ipv4 nor ipv6 from peer %Lu (%pISpfsc)\n", peer->internal_id, addr); + goto packet_processed; + } + + timers_data_received(peer); + + routed_peer = routing_table_lookup_src(&wg->peer_routing_table, skb); + peer_put(routed_peer); /* We don't need the extra reference. */ + + if (unlikely(routed_peer != peer)) { +#ifdef DEBUG + struct sockaddr_storage unencrypted_addr = { 0 }; + socket_addr_from_skb(&unencrypted_addr, skb); + net_dbg_ratelimited("Packet has unallowed src IP (%pISc) from peer %Lu (%pISpfsc)\n", &unencrypted_addr, peer->internal_id, addr); +#endif + ++dev->stats.rx_errors; + ++dev->stats.rx_frame_errors; + goto packet_processed; + } + + dev->last_rx = jiffies; + if (netif_rx(skb) == NET_RX_SUCCESS) + rx_stats(peer, skb->len); + else { + ++dev->stats.rx_dropped; + net_dbg_ratelimited("Failed to give packet to userspace from peer %Lu (%pISpfsc)\n", peer->internal_id, addr); + } + goto continue_processing; + +packet_processed: + dev_kfree_skb(skb); +continue_processing: + timers_any_authorized_packet_received(peer); + socket_set_peer_addr(peer, addr); + peer_put(peer); +} + +void packet_receive(struct wireguard_device *wg, struct sk_buff *skb) +{ + size_t len, offset; +#ifdef DEBUG + struct sockaddr_storage addr = { 0 }; + socket_addr_from_skb(&addr, skb); +#else + static const u8 addr; +#endif + + if (skb_data_offset(skb, &offset, &len) < 0) + goto err; + switch (message_determine_type(skb->data + offset, len)) { + case MESSAGE_HANDSHAKE_INITIATION: + case MESSAGE_HANDSHAKE_RESPONSE: + case MESSAGE_HANDSHAKE_COOKIE: + if (skb_queue_len(&wg->incoming_handshakes) > MAX_QUEUED_HANDSHAKES) { + net_dbg_ratelimited("Too many handshakes queued, dropping packet from %pISpfsc\n", &addr); + goto err; + } + if (skb_linearize(skb) < 0) { + net_dbg_ratelimited("Unable to linearize handshake skb from %pISpfsc\n", &addr); + goto err; + } + skb_queue_tail(&wg->incoming_handshakes, skb); + /* Queues up a call to packet_process_queued_handshake_packets(skb): */ + queue_work(wg->workqueue, &wg->incoming_handshakes_work); + break; + case MESSAGE_DATA: + packet_consume_data(skb, offset, wg, receive_data_packet); + break; + default: + net_dbg_ratelimited("Invalid packet from %pISpfsc\n", &addr); + goto err; + } + return; + +err: + dev_kfree_skb(skb); +} diff --git a/src/routing-table.c b/src/routing-table.c new file mode 100644 index 0000000..ec98f1d --- /dev/null +++ b/src/routing-table.c @@ -0,0 +1,633 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include "wireguard.h" +#include "routing-table.h" + +struct routing_table_node { + struct routing_table_node __rcu *bit[2]; + struct rcu_head rcu; + struct wireguard_peer *peer; + uint8_t cidr; + uint8_t bit_at_a, bit_at_b; + bool incidental; + uint8_t bits[]; +}; + +static inline uint8_t bit_at(const uint8_t *key, uint8_t a, uint8_t b) +{ + return (key[a] >> b) & 1; +} +static inline void assign_cidr(struct routing_table_node *node, uint8_t cidr) +{ + node->cidr = cidr; + node->bit_at_a = cidr / 8; + node->bit_at_b = 7 - (cidr % 8); +} + +/* Non-recursive RCU expansion of: + * + * free_node(node) + * { + * if (!node) + * return; + * free_node(node->bit[0]); + * free_node(node->bit[1]); + * kfree_rcu(node); + * } + */ +#define ref(p) rcu_access_pointer(p) +#define push(p) do { BUG_ON(len >= 128); stack[len++] = rcu_dereference_protected(p, lockdep_is_held(lock)); } while (0) +static void free_node(struct routing_table_node *top, struct mutex *lock) +{ + struct routing_table_node *stack[128]; + struct routing_table_node *node = NULL; + struct routing_table_node *prev = NULL; + unsigned int len = 0; + + if (!top) + return; + + stack[len++] = top; + while (len > 0) { + node = stack[len - 1]; + if (!prev || ref(prev->bit[0]) == node || ref(prev->bit[1]) == node) { + if (ref(node->bit[0])) + push(node->bit[0]); + else if (ref(node->bit[1])) + push(node->bit[1]); + } else if (ref(node->bit[0]) == prev) { + if (ref(node->bit[1])) + push(node->bit[1]); + } else { + kfree_rcu(node, rcu); + --len; + } + prev = node; + } +} +#undef push +#define push(p) do { BUG_ON(len >= 128); stack[len++] = p; } while (0) +static bool walk_remove_by_peer(struct routing_table_node __rcu **top, struct wireguard_peer *peer, struct mutex *lock) +{ + struct routing_table_node __rcu **stack[128]; + struct routing_table_node __rcu **nptr; + struct routing_table_node *node = NULL; + struct routing_table_node *prev = NULL; + unsigned int len = 0; + bool ret = false; + + stack[len++] = top; + while (len > 0) { + nptr = stack[len - 1]; + node = rcu_dereference_protected(*nptr, lockdep_is_held(lock)); + if (!node) { + --len; + continue; + } + if (!prev || ref(prev->bit[0]) == node || ref(prev->bit[1]) == node) { + if (ref(node->bit[0])) + push(&node->bit[0]); + else if (ref(node->bit[1])) + push(&node->bit[1]); + } else if (ref(node->bit[0]) == prev) { + if (ref(node->bit[1])) + push(&node->bit[1]); + } else { + if (node->peer == peer) { + ret = true; + node->peer = NULL; + node->incidental = true; + if (!node->bit[0] || !node->bit[1]) { + /* collapse (even if both are null) */ + rcu_assign_pointer(*nptr, rcu_dereference_protected(node->bit[!node->bit[0]], lockdep_is_held(lock))); + rcu_assign_pointer(node->bit[0], NULL); + rcu_assign_pointer(node->bit[1], NULL); + free_node(node, lock); + } + } + --len; + } + prev = node; + } + + return ret; +} +#undef ref +#undef push + +static inline bool match(const struct routing_table_node *node, const uint8_t *key, uint8_t match_len) +{ + uint8_t full_blocks_to_match = match_len / 8; + uint8_t bits_leftover = match_len % 8; + uint8_t mask; + const uint8_t *a = node->bits, *b = key; + if (memcmp(a, b, full_blocks_to_match)) + return false; + if (!bits_leftover) + return true; + mask = ~(0xff >> bits_leftover); + return (a[full_blocks_to_match] & mask) == (b[full_blocks_to_match] & mask); +} + +static inline uint8_t common_bits(const struct routing_table_node *node, const uint8_t *key, uint8_t match_len) +{ + uint8_t max = (((match_len > node->cidr) ? match_len : node->cidr) + 7) / 8; + uint8_t bits = 0; + uint8_t i, mask; + const uint8_t *a = node->bits, *b = key; + for (i = 0; i < max; ++i, bits += 8) { + if (a[i] != b[i]) + break; + } + if (i == max) + return bits; + for (mask = 128; mask > 0; mask /= 2, ++bits) { + if ((a[i] & mask) != (b[i] & mask)) + return bits; + } + BUG(); + return bits; +} + +static int remove(struct routing_table_node __rcu **trie, const uint8_t *key, uint8_t cidr, struct mutex *lock) +{ + struct routing_table_node *parent = NULL, *node; + node = rcu_dereference_protected(*trie, lockdep_is_held(lock)); + while (node && node->cidr <= cidr && match(node, key, node->cidr)) { + if (node->cidr == cidr) { + /* exact match */ + node->incidental = true; + node->peer = NULL; + if (!node->bit[0] || !node->bit[1]) { + /* collapse (even if both are null) */ + if (parent) + rcu_assign_pointer(parent->bit[bit_at(key, parent->bit_at_a, parent->bit_at_b)], + rcu_dereference_protected(node->bit[(!node->bit[0]) ? 1 : 0], lockdep_is_held(lock))); + rcu_assign_pointer(node->bit[0], NULL); + rcu_assign_pointer(node->bit[1], NULL); + free_node(node, lock); + } + return 0; + } + parent = node; + node = rcu_dereference_protected(parent->bit[bit_at(key, parent->bit_at_a, parent->bit_at_b)], lockdep_is_held(lock)); + } + return -ENOENT; +} + +static inline struct routing_table_node *find_node(struct routing_table_node *trie, uint8_t bits, const uint8_t *key) +{ + struct routing_table_node *node = trie, *found = NULL; + while (node && match(node, key, node->cidr)) { + if (!node->incidental) + found = node; + if (node->cidr == bits) + break; + node = rcu_dereference(node->bit[bit_at(key, node->bit_at_a, node->bit_at_b)]); + } + return found; +} + +static inline bool node_placement(struct routing_table_node __rcu *trie, const uint8_t *key, uint8_t cidr, struct routing_table_node **rnode, struct mutex *lock) +{ + bool exact = false; + struct routing_table_node *parent = NULL, *node = rcu_dereference_protected(trie, lockdep_is_held(lock)); + while (node && node->cidr <= cidr && match(node, key, node->cidr)) { + parent = node; + if (parent->cidr == cidr) { + exact = true; + break; + } + node = rcu_dereference_protected(parent->bit[bit_at(key, parent->bit_at_a, parent->bit_at_b)], lockdep_is_held(lock)); + } + if (rnode) + *rnode = parent; + return exact; +} + +static int add(struct routing_table_node __rcu **trie, uint8_t bits, const uint8_t *key, uint8_t cidr, struct wireguard_peer *peer, struct mutex *lock) +{ + struct routing_table_node *node, *parent, *down, *newnode; + int bits_in_common; + + if (!rcu_access_pointer(*trie)) { + node = kzalloc(sizeof(*node) + (bits + 7) / 8, GFP_KERNEL); + if (!node) + return -ENOMEM; + node->peer = peer; + memcpy(node->bits, key, (bits + 7) / 8); + assign_cidr(node, cidr); + rcu_assign_pointer(*trie, node); + return 0; + } + if (node_placement(*trie, key, cidr, &node, lock)) { + /* exact match */ + node->incidental = false; + node->peer = peer; + return 0; + } + + newnode = kzalloc(sizeof(*node) + (bits + 7) / 8, GFP_KERNEL); + if (!newnode) + return -ENOMEM; + newnode->peer = peer; + memcpy(newnode->bits, key, (bits + 7) / 8); + assign_cidr(newnode, cidr); + + if (!node) + down = rcu_dereference_protected(*trie, lockdep_is_held(lock)); + else + down = rcu_dereference_protected(node->bit[bit_at(key, node->bit_at_a, node->bit_at_b)], lockdep_is_held(lock)); + if (!down) { + rcu_assign_pointer(node->bit[bit_at(key, node->bit_at_a, node->bit_at_b)], newnode); + return 0; + } + /* here we must be inserting between node and down */ + bits_in_common = common_bits(down, key, cidr); + parent = node; + if (bits_in_common > cidr) + bits_in_common = cidr; + + /* we either need to make a new branch above down and newnode + * or newnode can be the branch. newnode can be the branch if + * its cidr == bits_in_common */ + if (newnode->cidr == bits_in_common) { + /* newnode can be the branch */ + rcu_assign_pointer(newnode->bit[bit_at(down->bits, newnode->bit_at_a, newnode->bit_at_b)], down); + if (!parent) + rcu_assign_pointer(*trie, newnode); + else + rcu_assign_pointer(parent->bit[bit_at(newnode->bits, parent->bit_at_a, parent->bit_at_b)], newnode); + } else { + /* reparent */ + node = kzalloc(sizeof(*node) + (bits + 7) / 8, GFP_KERNEL); + if (!node) { + kfree(newnode); + return -ENOMEM; + } + assign_cidr(node, bits_in_common); + node->incidental = true; + memcpy(node->bits, newnode->bits, (bits + 7) / 8); + rcu_assign_pointer(node->bit[bit_at(down->bits, node->bit_at_a, node->bit_at_b)], down); + rcu_assign_pointer(node->bit[bit_at(newnode->bits, node->bit_at_a, node->bit_at_b)], newnode); + if (!parent) + rcu_assign_pointer(*trie, node); + else + rcu_assign_pointer(parent->bit[bit_at(node->bits, parent->bit_at_a, parent->bit_at_b)], node); + } + return 0; +} + +#define push(p) do { \ + struct routing_table_node *next = (maybe_lock ? rcu_dereference_protected(p, lockdep_is_held(maybe_lock)) : rcu_dereference(p)); \ + if (next) { \ + BUG_ON(len >= 128); \ + stack[len++] = next; \ + } \ +} while (0) +static int walk_ips(struct routing_table_node *top, int family, void *ctx, int (*func)(void *ctx, struct wireguard_peer *peer, union nf_inet_addr ip, uint8_t cidr, int family), struct mutex *maybe_lock) +{ + int ret; + union nf_inet_addr ip = { .all = { 0 } }; + struct routing_table_node *stack[128]; + struct routing_table_node *node; + unsigned int len = 0; + struct wireguard_peer *peer; + + if (!top) + return 0; + + stack[len++] = top; + while (len > 0) { + node = stack[--len]; + + peer = peer_get(node->peer); + if (peer) { + memcpy(ip.all, node->bits, family == AF_INET6 ? 16 : 4); + ret = func(ctx, peer, ip, node->cidr, family); + peer_put(peer); + if (ret) + return ret; + } + + push(node->bit[0]); + push(node->bit[1]); + } + return 0; +} +static int walk_ips_by_peer(struct routing_table_node *top, int family, void *ctx, struct wireguard_peer *peer, int (*func)(void *ctx, union nf_inet_addr ip, uint8_t cidr, int family), struct mutex *maybe_lock) +{ + int ret; + union nf_inet_addr ip = { .all = { 0 } }; + struct routing_table_node *stack[128]; + struct routing_table_node *node; + unsigned int len = 0; + + if (!top) + return 0; + + stack[len++] = top; + while (len > 0) { + node = stack[--len]; + + if (node->peer == peer) { + memcpy(ip.all, node->bits, family == AF_INET6 ? 16 : 4); + ret = func(ctx, ip, node->cidr, family); + if (ret) + return ret; + } + + push(node->bit[0]); + push(node->bit[1]); + } + return 0; +} +#undef push + +void routing_table_init(struct routing_table *table) +{ + memset(table, 0, sizeof(struct routing_table)); + mutex_init(&table->table_update_lock); +} + +void routing_table_free(struct routing_table *table) +{ + mutex_lock(&table->table_update_lock); + free_node(rcu_dereference_protected(table->root4, lockdep_is_held(&table->table_update_lock)), &table->table_update_lock); + rcu_assign_pointer(table->root4, NULL); + free_node(rcu_dereference_protected(table->root6, lockdep_is_held(&table->table_update_lock)), &table->table_update_lock); + rcu_assign_pointer(table->root6, NULL); + mutex_unlock(&table->table_update_lock); +} + +int routing_table_insert_v4(struct routing_table *table, const struct in_addr *ip, uint8_t cidr, struct wireguard_peer *peer) +{ + int ret; + if (cidr > 32) + return -EINVAL; + mutex_lock(&table->table_update_lock); + ret = add(&table->root4, 32, (const uint8_t *)ip, cidr, peer, &table->table_update_lock); + mutex_unlock(&table->table_update_lock); + return ret; +} + +int routing_table_insert_v6(struct routing_table *table, const struct in6_addr *ip, uint8_t cidr, struct wireguard_peer *peer) +{ + int ret; + if (cidr > 128) + return -EINVAL; + mutex_lock(&table->table_update_lock); + ret = add(&table->root6, 128, (const uint8_t *)ip, cidr, peer, &table->table_update_lock); + mutex_unlock(&table->table_update_lock); + return ret; +} + +/* Returns a strong reference to a peer */ +inline struct wireguard_peer *routing_table_lookup_v4(struct routing_table *table, const struct in_addr *ip) +{ + struct wireguard_peer *peer = NULL; + struct routing_table_node *node; + + rcu_read_lock(); + node = find_node(rcu_dereference(table->root4), 32, (const uint8_t *)ip); + if (node) + peer = peer_get(node->peer); + rcu_read_unlock(); + return peer; +} + +/* Returns a strong reference to a peer */ +inline struct wireguard_peer *routing_table_lookup_v6(struct routing_table *table, const struct in6_addr *ip) +{ + struct wireguard_peer *peer = NULL; + struct routing_table_node *node; + + rcu_read_lock(); + node = find_node(rcu_dereference(table->root6), 128, (const uint8_t *)ip); + if (node) + peer = peer_get(node->peer); + rcu_read_unlock(); + return peer; +} + +int routing_table_remove_v4(struct routing_table *table, const struct in_addr *ip, uint8_t cidr) +{ + int ret; + mutex_lock(&table->table_update_lock); + ret = remove(&table->root4, (const uint8_t *)ip, cidr, &table->table_update_lock); + mutex_unlock(&table->table_update_lock); + return ret; +} + +int routing_table_remove_v6(struct routing_table *table, const struct in6_addr *ip, uint8_t cidr) +{ + int ret; + mutex_lock(&table->table_update_lock); + ret = remove(&table->root6, (const uint8_t *)ip, cidr, &table->table_update_lock); + mutex_unlock(&table->table_update_lock); + return ret; +} + +int routing_table_remove_by_peer(struct routing_table *table, struct wireguard_peer *peer) +{ + bool found; + mutex_lock(&table->table_update_lock); + found = walk_remove_by_peer(&table->root4, peer, &table->table_update_lock) | walk_remove_by_peer(&table->root6, peer, &table->table_update_lock); + mutex_unlock(&table->table_update_lock); + return found ? 0 : -EINVAL; +} + +/* Calls func with a strong reference to each peer, before putting it when the function has completed. + * It's thus up to the caller to call peer_put on it if it's going to be used elsewhere after or stored. */ +int routing_table_walk_ips(struct routing_table *table, void *ctx, int (*func)(void *ctx, struct wireguard_peer *peer, union nf_inet_addr ip, uint8_t cidr, int family)) +{ + int ret; + rcu_read_lock(); + ret = walk_ips(rcu_dereference(table->root4), AF_INET, ctx, func, NULL); + rcu_read_unlock(); + if (ret) + return ret; + rcu_read_lock(); + ret = walk_ips(rcu_dereference(table->root6), AF_INET6, ctx, func, NULL); + rcu_read_unlock(); + return ret; +} + +int routing_table_walk_ips_by_peer(struct routing_table *table, void *ctx, struct wireguard_peer *peer, int (*func)(void *ctx, union nf_inet_addr ip, uint8_t cidr, int family)) +{ + int ret; + rcu_read_lock(); + ret = walk_ips_by_peer(rcu_dereference(table->root4), AF_INET, ctx, peer, func, NULL); + rcu_read_unlock(); + if (ret) + return ret; + rcu_read_lock(); + ret = walk_ips_by_peer(rcu_dereference(table->root6), AF_INET6, ctx, peer, func, NULL); + rcu_read_unlock(); + return ret; +} + +int routing_table_walk_ips_by_peer_sleepable(struct routing_table *table, void *ctx, struct wireguard_peer *peer, int (*func)(void *ctx, union nf_inet_addr ip, uint8_t cidr, int family)) +{ + int ret; + mutex_lock(&table->table_update_lock); + ret = walk_ips_by_peer(rcu_dereference_protected(table->root4, lockdep_is_held(&table->table_update_lock)), AF_INET, ctx, peer, func, &table->table_update_lock); + mutex_unlock(&table->table_update_lock); + if (ret) + return ret; + mutex_lock(&table->table_update_lock); + ret = walk_ips_by_peer(rcu_dereference_protected(table->root6, lockdep_is_held(&table->table_update_lock)), AF_INET6, ctx, peer, func, &table->table_update_lock); + mutex_unlock(&table->table_update_lock); + return ret; +} + +static inline bool has_valid_ip_header(struct sk_buff *skb) +{ + if (unlikely(skb->len < sizeof(struct iphdr))) + return false; + else if (unlikely(skb->len < sizeof(struct ipv6hdr) && ip_hdr(skb)->version == 6)) + return false; + else if (unlikely(ip_hdr(skb)->version != 4 && ip_hdr(skb)->version != 6)) + return false; + return true; +} + +/* Returns a strong reference to a peer */ +struct wireguard_peer *routing_table_lookup_dst(struct routing_table *table, struct sk_buff *skb) +{ + if (unlikely(!has_valid_ip_header(skb))) + return NULL; + if (ip_hdr(skb)->version == 4) + return routing_table_lookup_v4(table, (struct in_addr *)&ip_hdr(skb)->daddr); + else if (ip_hdr(skb)->version == 6) + return routing_table_lookup_v6(table, &ipv6_hdr(skb)->daddr); + return NULL; +} + +/* Returns a strong reference to a peer */ +struct wireguard_peer *routing_table_lookup_src(struct routing_table *table, struct sk_buff *skb) +{ + if (unlikely(!has_valid_ip_header(skb))) + return NULL; + if (ip_hdr(skb)->version == 4) + return routing_table_lookup_v4(table, (struct in_addr *)&ip_hdr(skb)->saddr); + else if (ip_hdr(skb)->version == 6) + return routing_table_lookup_v6(table, &ipv6_hdr(skb)->saddr); + return NULL; +} +#ifdef DEBUG +static inline struct in_addr *ip4(uint8_t a, uint8_t b, uint8_t c, uint8_t d) +{ + static struct in_addr ip; + uint8_t *split = (uint8_t *)&ip; + split[0] = a; + split[1] = b; + split[2] = c; + split[3] = d; + return &ip; +} +static inline struct in6_addr *ip6(uint32_t a, uint32_t b, uint32_t c, uint32_t d) +{ + static struct in6_addr ip; + __be32 *split = (__be32 *)&ip; + split[0] = cpu_to_be32(a); + split[1] = cpu_to_be32(b); + split[2] = cpu_to_be32(c); + split[3] = cpu_to_be32(d); + return &ip; +} + +void routing_table_selftest(void) +{ + struct routing_table t; + struct wireguard_peer *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL, *f = NULL, *g = NULL, *h = NULL; + size_t i = 0; + bool success = true; + struct in6_addr ip; + __be64 part; + + routing_table_init(&t); +#define init_peer(name) do { name = kzalloc(sizeof(struct wireguard_peer), GFP_KERNEL); if (!name) goto free; kref_init(&name->refcount); } while (0) + init_peer(a); + init_peer(b); + init_peer(c); + init_peer(d); + init_peer(e); + init_peer(f); + init_peer(g); + init_peer(h); +#undef init_peer + +#define insert(version, mem, ipa, ipb, ipc, ipd, cidr) routing_table_insert_v##version(&t, ip##version(ipa, ipb, ipc, ipd), cidr, mem) + insert(4, a, 192, 168, 4, 0, 24); + insert(4, b, 192, 168, 4, 4, 32); + insert(4, c, 192, 168, 0, 0, 16); + insert(4, d, 192, 95, 5, 64, 27); + insert(4, c, 192, 95, 5, 65, 27); /* replaces previous entry, and maskself is required */ + insert(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128); + insert(6, c, 0x26075300, 0x60006b00, 0, 0, 64); + insert(4, e, 0, 0, 0, 0, 0); + insert(6, e, 0, 0, 0, 0, 0); + insert(6, f, 0, 0, 0, 0, 0); /* replaces previous entry */ + insert(6, g, 0x24046800, 0, 0, 0, 32); + insert(6, h, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 64); /* maskself is required */ + insert(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 128); + insert(4, g, 64, 15, 112, 0, 20); + insert(4, h, 64, 15, 123, 211, 25); /* maskself is required */ +#undef insert + +#define test(version, mem, ipa, ipb, ipc, ipd) do { \ + bool _s = routing_table_lookup_v##version(&t, ip##version(ipa, ipb, ipc, ipd)) == mem; \ + ++i; \ + if (!_s) { \ + pr_info("routing table self-test %zu: FAIL\n", i); \ + success = false; \ + } \ +} while (0) + test(4, a, 192, 168, 4, 20); + test(4, a, 192, 168, 4, 0); + test(4, b, 192, 168, 4, 4); + test(4, c, 192, 168, 200, 182); + test(4, c, 192, 95, 5, 68); + test(4, e, 192, 95, 5, 96); + test(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543); + test(6, c, 0x26075300, 0x60006b00, 0, 0xc02e01ee); + test(6, f, 0x26075300, 0x60006b01, 0, 0); + test(6, g, 0x24046800, 0x40040806, 0, 0x1006); + test(6, g, 0x24046800, 0x40040806, 0x1234, 0x5678); + test(6, f, 0x240467ff, 0x40040806, 0x1234, 0x5678); + test(6, f, 0x24046801, 0x40040806, 0x1234, 0x5678); + test(6, h, 0x24046800, 0x40040800, 0x1234, 0x5678); + test(6, h, 0x24046800, 0x40040800, 0, 0); + test(6, h, 0x24046800, 0x40040800, 0x10101010, 0x10101010); + test(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef); + test(4, g, 64, 15, 116, 26); + test(4, g, 64, 15, 127, 3); + test(4, g, 64, 15, 123, 1); + test(4, h, 64, 15, 123, 128); + test(4, h, 64, 15, 123, 129); +#undef test + + /* These will hit the BUG_ON(len >= 128) in free_node if something goes wrong. */ + for (i = 0; i < 128; ++i) { + part = cpu_to_be64(~(1LLU << (i % 64))); + memset(&ip, 0xff, 16); + memcpy((uint8_t *)&ip + (i < 64) * 8, &part, 8); + routing_table_insert_v6(&t, &ip, 128, a); + } + + if (success) + pr_info("routing table self-tests: pass\n"); + +free: + routing_table_free(&t); + kfree(a); + kfree(b); + kfree(c); + kfree(d); + kfree(e); + kfree(f); + kfree(g); + kfree(h); +} +#endif diff --git a/src/routing-table.h b/src/routing-table.h new file mode 100644 index 0000000..2e3a0ba --- /dev/null +++ b/src/routing-table.h @@ -0,0 +1,41 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef ROUTINGTABLE_H +#define ROUTINGTABLE_H + +#include "wireguard.h" +#include +#include +#include + +struct wireguard_peer; +struct routing_table_node; + +struct routing_table { + struct routing_table_node __rcu *root4; + struct routing_table_node __rcu *root6; + struct mutex table_update_lock; +}; + +void routing_table_init(struct routing_table *table); +void routing_table_free(struct routing_table *table); +int routing_table_insert_v4(struct routing_table *table, const struct in_addr *ip, uint8_t cidr, struct wireguard_peer *peer); +int routing_table_insert_v6(struct routing_table *table, const struct in6_addr *ip, uint8_t cidr, struct wireguard_peer *peer); +int routing_table_remove_v4(struct routing_table *table, const struct in_addr *ip, uint8_t cidr); +int routing_table_remove_v6(struct routing_table *table, const struct in6_addr *ip, uint8_t cidr); +int routing_table_remove_by_peer(struct routing_table *table, struct wireguard_peer *peer); +int routing_table_walk_ips(struct routing_table *table, void *ctx, int (*func)(void *ctx, struct wireguard_peer *peer, union nf_inet_addr ip, uint8_t cidr, int family)); +int routing_table_walk_ips_by_peer(struct routing_table *table, void *ctx, struct wireguard_peer *peer, int (*func)(void *ctx, union nf_inet_addr ip, uint8_t cidr, int family)); +int routing_table_walk_ips_by_peer_sleepable(struct routing_table *table, void *ctx, struct wireguard_peer *peer, int (*func)(void *ctx, union nf_inet_addr ip, uint8_t cidr, int family)); + +/* These return a strong reference to a peer: */ +struct wireguard_peer *routing_table_lookup_v4(struct routing_table *table, const struct in_addr *ip); +struct wireguard_peer *routing_table_lookup_v6(struct routing_table *table, const struct in6_addr *ip); +struct wireguard_peer *routing_table_lookup_dst(struct routing_table *table, struct sk_buff *skb); +struct wireguard_peer *routing_table_lookup_src(struct routing_table *table, struct sk_buff *skb); + +#ifdef DEBUG +void routing_table_selftest(void); +#endif + +#endif diff --git a/src/send.c b/src/send.c new file mode 100644 index 0000000..f43e4a3 --- /dev/null +++ b/src/send.c @@ -0,0 +1,266 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include "wireguard.h" +#include "packets.h" +#include "timers.h" +#include "device.h" +#include "socket.h" +#include "messages.h" +#include "cookie.h" +#include +#include +#include +#include +#include +#include + +void packet_send_handshake_initiation(struct wireguard_peer *peer) +{ + struct message_handshake_initiation packet; + + net_dbg_ratelimited("Sending handshake initiation to peer %Lu (%pISpfsc)\n", peer->internal_id, &peer->endpoint_addr); + peer->last_sent_handshake = get_jiffies_64(); + + if (noise_handshake_create_initiation(&packet, &peer->handshake)) { + cookie_add_mac_to_packet(&packet, sizeof(packet), peer); + socket_send_buffer_to_peer(peer, &packet, sizeof(struct message_handshake_initiation), HANDSHAKE_DSCP); + timers_handshake_initiated(peer); + } +} + +void packet_send_handshake_response(struct wireguard_peer *peer) +{ + struct message_handshake_response packet; + + net_dbg_ratelimited("Sending handshake response to peer %Lu (%pISpfsc)\n", peer->internal_id, &peer->endpoint_addr); + peer->last_sent_handshake = get_jiffies_64(); + + if (noise_handshake_create_response(&packet, &peer->handshake)) { + cookie_add_mac_to_packet(&packet, sizeof(packet), peer); + if (noise_handshake_begin_session(&peer->handshake, &peer->keypairs, false)) { + timers_ephemeral_key_created(peer); + socket_send_buffer_to_peer(peer, &packet, sizeof(struct message_handshake_response), HANDSHAKE_DSCP); + } + } +} + +void packet_send_queued_handshakes(struct work_struct *work) +{ + struct wireguard_peer *peer = container_of(work, struct wireguard_peer, transmit_handshake_work); + peer->last_sent_handshake = get_jiffies_64(); + packet_send_handshake_initiation(peer); + peer_put(peer); +} + +void packet_queue_send_handshake_initiation(struct wireguard_peer *peer) +{ + rcu_read_lock(); + peer = peer_get(peer); + rcu_read_unlock(); + if (!peer) + return; + /* Queues up calling packet_send_queued_handshakes(peer), where we do a peer_put(peer) after: */ + if (!queue_work(peer->device->workqueue, &peer->transmit_handshake_work)) + peer_put(peer); /* If the work was already queued, we want to drop the extra reference */ +} + +static inline void ratelimit_packet_send_handshake_initiation(struct wireguard_peer *peer) +{ + if (time_is_before_jiffies64(peer->last_sent_handshake + REKEY_TIMEOUT)) + packet_queue_send_handshake_initiation(peer); +} + +void packet_send_handshake_cookie(struct wireguard_device *wg, struct sk_buff *initiating_skb, void *data, size_t data_len, __le32 sender_index) +{ + struct message_handshake_cookie packet; + +#ifdef DEBUG + struct sockaddr_storage addr = { 0 }; + if (initiating_skb) + socket_addr_from_skb(&addr, initiating_skb); + net_dbg_ratelimited("Sending cookie response for denied handshake message for %pISpfsc\n", &addr); +#endif + cookie_message_create(&packet, initiating_skb, data, data_len, sender_index, &wg->cookie_checker); + socket_send_buffer_as_reply_to_skb(initiating_skb, &packet, sizeof(packet), wg); +} + +static inline void keep_key_fresh(struct wireguard_peer *peer) +{ + struct noise_keypair *keypair; + unsigned long rekey_after_time = REKEY_AFTER_TIME; + + rcu_read_lock(); + keypair = rcu_dereference(peer->keypairs.current_keypair); + if (unlikely(!keypair || !keypair->sending.is_valid)) { + rcu_read_unlock(); + return; + } + + /* We don't want both peers initiating a new handshake at the same time */ + if (!keypair->i_am_the_initiator) + rekey_after_time += REKEY_TIMEOUT * 2; + + if (atomic64_read(&keypair->sending.counter.counter) > REKEY_AFTER_MESSAGES || + time_is_before_eq_jiffies64(keypair->sending.birthdate + rekey_after_time)) { + rcu_read_unlock(); + ratelimit_packet_send_handshake_initiation(peer); + } else + rcu_read_unlock(); +} + +void packet_send_keepalive(struct wireguard_peer *peer) +{ + struct sk_buff *skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH, GFP_ATOMIC); + if (unlikely(!skb)) + return; + skb_reserve(skb, DATA_PACKET_HEAD_ROOM); + skb->dev = netdev_pub(peer->device); + skb_queue_tail(&peer->tx_packet_queue, skb); + packet_send_queue(peer); +} + +struct packet_bundle { + atomic_t count; + struct sk_buff *first; +}; + +static inline void send_off_bundle(struct packet_bundle *bundle, struct wireguard_peer *peer) +{ + struct sk_buff *skb, *next; + for (skb = bundle->first; skb; skb = next) { + /* We store the next pointer locally because socket_send_skb_to_peer + * consumes the packet before the top of the loop comes again. */ + next = skb->next; + if (likely(!socket_send_skb_to_peer(peer, skb, 0 /* TODO: Should we copy the DSCP value from the enclosed packet? */))) + timers_data_sent(peer); + } +} + +static void message_create_data_done(struct sk_buff *skb, struct wireguard_peer *peer) +{ + struct packet_bundle *bundle = *((struct packet_bundle **)skb->cb); + /* A packet completed successfully, so we deincrement the counter of packets + * remaining, and if we hit zero we can send it off. */ + if (atomic_dec_and_test(&bundle->count)) + send_off_bundle(bundle, peer); + keep_key_fresh(peer); +} + +int packet_send_queue(struct wireguard_peer *peer) +{ + struct packet_bundle *bundle; + struct sk_buff_head local_queue; + struct sk_buff *skb, *next, *first; + unsigned long flags; + bool parallel = true; + + /* Steal the current queue into our local one. */ + skb_queue_head_init(&local_queue); + spin_lock_irqsave(&peer->tx_packet_queue.lock, flags); + skb_queue_splice_init(&peer->tx_packet_queue, &local_queue); + spin_unlock_irqrestore(&peer->tx_packet_queue.lock, flags); + + first = skb_peek(&local_queue); + if (unlikely(!first)) + goto out; + + /* Remove the circularity from the queue, so that we can iterate on + * on the skbs themselves. */ + local_queue.prev->next = local_queue.next->prev = NULL; + + /* The first pointer of the control block is a pointer to the bundle + * and after that, in the first packet only, is where we actually store + * the bundle data. This saves us a call to kmalloc. */ + bundle = (struct packet_bundle *)(first->cb + sizeof(void *)); + atomic_set(&bundle->count, skb_queue_len(&local_queue)); + bundle->first = first; + + /* Non-parallel path for the case of only one packet that's small */ + if (skb_queue_len(&local_queue) == 1 && first->len <= 256) + parallel = false; + + for (skb = first; skb; skb = next) { + /* We store the next pointer locally because we might free skb + * before the top of the loop comes again. */ + next = skb->next; + + /* We set the first pointer in cb to point to the bundle data. */ + *(struct packet_bundle **)skb->cb = bundle; + + /* We submit it for encryption and sending. */ + switch (packet_create_data(skb, peer, message_create_data_done, parallel)) { + case 0: + /* If all goes well, we can simply deincrement the queue counter. Even + * though skb_dequeue() would do this for us, we don't want to break the + * links between packets, so we just traverse the list normally and + * deincrement the counter manually each time a packet is consumed. */ + --local_queue.qlen; + break; + case -ENOKEY: + /* ENOKEY means that we don't have a valid session for the peer, which + * means we should initiate a session, and then requeue everything. */ + ratelimit_packet_send_handshake_initiation(peer); + /* Fall through */ + case -EBUSY: + /* EBUSY happens when the parallel workers are all filled up, in which + * case we should requeue everything. */ + if (skb->prev) { + /* Since we're requeuing skb and everything after skb, we make + * sure that the previously successfully sent packets don't link + * to the requeued packets, which will be sent independently the + * next time this function is called. */ + skb->prev->next = NULL; + skb->prev = NULL; + } + if (atomic_sub_and_test(local_queue.qlen, &bundle->count)) { + /* We remove the requeued packets from the count of total packets + * that were successfully submitted, which means we then must see + * if we were the ones to get it to zero. If we are at zero, we + * only send the previous successful packets if there actually were + * packets that succeeded before skb. */ + if (skb != first) + send_off_bundle(bundle, peer); + } + /* We stick the remaining skbs from local_queue at the top of the peer's + * queue again, setting the top of local_queue to be the skb that begins + * the requeueing. */ + local_queue.next = skb; + spin_lock_irqsave(&peer->tx_packet_queue.lock, flags); + skb_queue_splice(&local_queue, &peer->tx_packet_queue); + spin_unlock_irqrestore(&peer->tx_packet_queue.lock, flags); + goto out; + default: + /* If we failed for any other reason, we want to just free the packet and + * forget about it, so we first deincrement the queue counter as in the + * successful case above. */ + --local_queue.qlen; + if (skb == first && next) { + /* If it's the first one that failed, we need to move the bundle data + * to the next packet. Then, all subsequent assignments of the bundle + * pointer will be to the moved data. */ + *(struct packet_bundle *)(next->cb + sizeof(void *)) = *bundle; + bundle = (struct packet_bundle *)(next->cb + sizeof(void *)); + bundle->first = next; + } + /* We remove the skb from the list and free it. */ + if (skb->prev) + skb->prev->next = skb->next; + if (skb->next) + skb->next->prev = skb->prev; + kfree_skb(skb); + if (atomic_dec_and_test(&bundle->count)) { + /* As above, if this failed packet pushes the count to zero, we have to + * be the ones to send it off only in the case that there's something to + * send. */ + if (skb != first) + send_off_bundle(bundle, peer); + } + /* Only at the bottom do we update our local `first` variable, because we need it + * in the check above. But it's important that bundle->first is updated earlier when + * actually moving the bundle. */ + first = bundle->first; + } + } +out: + return NETDEV_TX_OK; +} diff --git a/src/socket.c b/src/socket.c new file mode 100644 index 0000000..ac19a47 --- /dev/null +++ b/src/socket.c @@ -0,0 +1,479 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include "wireguard.h" +#include "socket.h" +#include "packets.h" +#include "messages.h" + +#include +#include +#include +#include +#include + +int socket_addr_from_skb(struct sockaddr_storage *sockaddr, struct sk_buff *skb) +{ + struct iphdr *ip4; + struct ipv6hdr *ip6; + struct udphdr *udp; + struct sockaddr_in *addr4; + struct sockaddr_in6 *addr6; + + addr4 = (struct sockaddr_in *)sockaddr; + addr6 = (struct sockaddr_in6 *)sockaddr; + ip4 = ip_hdr(skb); + ip6 = ipv6_hdr(skb); + udp = udp_hdr(skb); + if (ip4->version == 4) { + addr4->sin_family = AF_INET; + addr4->sin_port = udp->source; + addr4->sin_addr.s_addr = ip4->saddr; + } else if (ip4->version == 6) { + addr6->sin6_family = AF_INET6; + addr6->sin6_port = udp->source; + addr6->sin6_addr = ip6->saddr; + addr6->sin6_scope_id = ipv6_iface_scope_id(&ip6->saddr, skb->skb_iif); + /* TODO: addr6->sin6_flowinfo */ + } else + return -EINVAL; + return 0; +} + +static inline struct dst_entry *route(struct wireguard_device *wg, struct flowi4 *fl4, struct flowi6 *fl6, struct sockaddr_storage *addr, struct sock *sock4, struct sock *sock6) +{ + struct dst_entry *dst = ERR_PTR(-EAFNOSUPPORT); + + if (addr->ss_family == AF_INET) { + struct rtable *rt; + struct sockaddr_in *sin4 = (struct sockaddr_in *)addr; + + if (unlikely(!sock4)) + return ERR_PTR(-ENONET); + + memset(fl4, 0, sizeof(struct flowi4)); + fl4->daddr = sin4->sin_addr.s_addr; + fl4->fl4_dport = sin4->sin_port; + fl4->fl4_sport = htons(wg->incoming_port); + fl4->flowi4_proto = IPPROTO_UDP; + + security_sk_classify_flow(sock4, flowi4_to_flowi(fl4)); + rt = ip_route_output_flow(sock_net(sock4), fl4, sock4); + if (unlikely(IS_ERR(rt))) + dst = ERR_PTR(PTR_ERR(rt)); + dst = &rt->dst; + } else if (addr->ss_family == AF_INET6) { + int ret; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; + + if (unlikely(!sock6)) + return ERR_PTR(-ENONET); + + memset(fl6, 0, sizeof(struct flowi6)); + fl6->daddr = sin6->sin6_addr; + fl6->fl6_dport = sin6->sin6_port; + fl6->fl6_sport = htons(wg->incoming_port); + fl6->flowi6_oif = sin6->sin6_scope_id; + fl6->flowi6_proto = IPPROTO_UDP; + /* TODO: addr6->sin6_flowinfo */ + + security_sk_classify_flow(sock6, flowi6_to_flowi(fl6)); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) + ret = ipv6_stub->ipv6_dst_lookup(sock_net(sock6), sock6, &dst, fl6); +#else + ret = ipv6_stub->ipv6_dst_lookup(sock6, &dst, fl6); +#endif + if (unlikely(ret)) + dst = ERR_PTR(ret); + } + return dst; +} + +static inline int send(struct net_device *dev, struct sk_buff *skb, struct dst_entry *dst, struct flowi4 *fl4, struct flowi6 *fl6, struct sockaddr_storage *addr, struct sock *sock4, struct sock *sock6, u8 dscp) +{ + int ret = -EAFNOSUPPORT; + + skb->next = skb->prev = NULL; + skb->dev = dev; + + if (addr->ss_family == AF_INET) { + if (unlikely(!sock4)) { + ret = -ENONET; + goto err; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) + ret = udp_tunnel_xmit_skb((struct rtable *)dst, sock4, skb, + fl4->saddr, fl4->daddr, + dscp, ip4_dst_hoplimit(dst), 0, + fl4->fl4_sport, fl4->fl4_dport, + false, false); + iptunnel_xmit_stats(ret, &dev->stats, dev->tstats); + return ret > 0 ? 0 : -ECOMM; +#else + udp_tunnel_xmit_skb((struct rtable *)dst, sock4, skb, + fl4->saddr, fl4->daddr, + dscp, ip4_dst_hoplimit(dst), 0, + fl4->fl4_sport, fl4->fl4_dport, + false, false); + return 0; +#endif + } else if (addr->ss_family == AF_INET6) { + if (unlikely(!sock6)) { + ret = -ENONET; + goto err; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) + return udp_tunnel6_xmit_skb(dst, sock6, skb, dev, + &fl6->saddr, &fl6->daddr, + dscp, ip6_dst_hoplimit(dst), + fl6->fl6_sport, fl6->fl6_dport, + false) == 0 ? 0 : -ECOMM; +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) + udp_tunnel6_xmit_skb(dst, sock6, skb, dev, + &fl6->saddr, &fl6->daddr, + dscp, ip6_dst_hoplimit(dst), + fl6->fl6_sport, fl6->fl6_dport, + false); + return 0; +#else + udp_tunnel6_xmit_skb(dst, sock6, skb, dev, + &fl6->saddr, &fl6->daddr, + dscp, ip6_dst_hoplimit(dst), 0, + fl6->fl6_sport, fl6->fl6_dport, + false); + return 0; +#endif + } + +err: + kfree_skb(skb); + dst_release(dst); + return ret; +} + +static inline void __socket_set_peer_dst(struct wireguard_peer *peer) +{ + struct dst_entry *old_dst, *new_dst; + lockdep_assert_held(&peer->endpoint_lock); + + old_dst = peer->endpoint_dst; + peer->endpoint_dst = NULL; + wmb(); + if (old_dst) + dst_release(old_dst); + + rcu_read_lock(); + new_dst = route(peer->device, &peer->endpoint_flow.fl4, &peer->endpoint_flow.fl6, &peer->endpoint_addr, rcu_dereference(peer->device->sock4), rcu_dereference(peer->device->sock6)); + rcu_read_unlock(); + + if (likely(!IS_ERR(new_dst))) { + peer->endpoint_dst = new_dst; + wmb(); + } +} + +void socket_set_peer_dst(struct wireguard_peer *peer) +{ + write_lock_bh(&peer->endpoint_lock); + __socket_set_peer_dst(peer); + write_unlock_bh(&peer->endpoint_lock); +} + +void socket_set_peer_addr(struct wireguard_peer *peer, struct sockaddr_storage *sockaddr) +{ + if (sockaddr->ss_family == AF_INET) { + read_lock_bh(&peer->endpoint_lock); + if (!memcmp(sockaddr, &peer->endpoint_addr, sizeof(struct sockaddr_in))) + goto out; + read_unlock_bh(&peer->endpoint_lock); + write_lock_bh(&peer->endpoint_lock); + memcpy(&peer->endpoint_addr, sockaddr, sizeof(struct sockaddr_in)); + } else if (sockaddr->ss_family == AF_INET6) { + read_lock_bh(&peer->endpoint_lock); + if (!memcmp(sockaddr, &peer->endpoint_addr, sizeof(struct sockaddr_in6))) + goto out; + read_unlock_bh(&peer->endpoint_lock); + write_lock_bh(&peer->endpoint_lock); + memcpy(&peer->endpoint_addr, sockaddr, sizeof(struct sockaddr_in6)); + } else + return; + __socket_set_peer_dst(peer); + write_unlock_bh(&peer->endpoint_lock); + return; +out: + read_unlock_bh(&peer->endpoint_lock); +} + +static inline struct dst_entry *peer_dst_get(struct wireguard_peer *peer) +{ + struct dst_entry *dst = NULL; + read_lock_bh(&peer->endpoint_lock); + + if (!peer->endpoint_dst || (peer->endpoint_dst->obsolete && !peer->endpoint_dst->ops->check(peer->endpoint_dst, 0))) { + read_unlock_bh(&peer->endpoint_lock); + socket_set_peer_dst(peer); + read_lock_bh(&peer->endpoint_lock); + if (!peer->endpoint_dst) + goto out; + } + + if (!atomic_inc_not_zero(&peer->endpoint_dst->__refcnt)) + goto out; + dst = peer->endpoint_dst; + +out: + read_unlock_bh(&peer->endpoint_lock); + return dst; +} + + +int socket_send_skb_to_peer(struct wireguard_peer *peer, struct sk_buff *skb, u8 dscp) +{ + struct net_device *dev = netdev_pub(peer->device); + struct dst_entry *dst; + size_t skb_len = skb->len; + int ret = 0; + + dst = peer_dst_get(peer); + if (unlikely(!dst)) { + net_dbg_ratelimited("No route to %pISpfsc for peer %Lu\n", &peer->endpoint_addr, peer->internal_id); + kfree_skb(skb); + return -EHOSTUNREACH; + } else if (unlikely(dst->dev == dev)) { + net_dbg_ratelimited("Avoiding routing loop to %pISpfsc for peer %Lu\n", &peer->endpoint_addr, peer->internal_id); + kfree_skb(skb); + return -ELOOP; + } + + rcu_read_lock(); + read_lock_bh(&peer->endpoint_lock); + + ret = send(dev, skb, dst, &peer->endpoint_flow.fl4, &peer->endpoint_flow.fl6, &peer->endpoint_addr, rcu_dereference(peer->device->sock4), rcu_dereference(peer->device->sock6), dscp); + if (!ret) + peer->tx_bytes += skb_len; + + read_unlock_bh(&peer->endpoint_lock); + rcu_read_unlock(); + + return ret; +} + +int socket_send_buffer_to_peer(struct wireguard_peer *peer, void *buffer, size_t len, u8 dscp) +{ + struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + skb_reserve(skb, SKB_HEADER_LEN); + memcpy(skb_put(skb, len), buffer, len); + return socket_send_skb_to_peer(peer, skb, dscp); +} + +static int send_to_sockaddr(struct sk_buff *skb, struct wireguard_device *wg, struct sockaddr_storage *addr, struct sock *sock4, struct sock *sock6) +{ + struct dst_entry *dst; + struct net_device *dev = netdev_pub(wg); + union { + struct flowi4 fl4; + struct flowi6 fl6; + } fl; + + dst = route(wg, &fl.fl4, &fl.fl6, addr, sock4, sock6); + if (IS_ERR(dst)) { + net_dbg_ratelimited("No route to %pISpfsc\n", addr); + kfree_skb(skb); + return PTR_ERR(dst); + } else if (unlikely(dst->dev == netdev_pub(wg))) { + net_dbg_ratelimited("Avoiding routing loop to %pISpfsc\n", addr); + dst_release(dst); + kfree_skb(skb); + return -ELOOP; + } + + return send(dev, skb, dst, &fl.fl4, &fl.fl6, addr, sock4, sock6, 0); +} + +int socket_send_buffer_as_reply_to_skb(struct sk_buff *in_skb, void *out_buffer, size_t len, struct wireguard_device *wg) +{ + int ret = 0; + struct sk_buff *skb; + struct sockaddr_storage addr = { 0 }; + + if (unlikely(!in_skb)) + return -EINVAL; + ret = socket_addr_from_skb(&addr, in_skb); + if (ret < 0) + return ret; + + skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + skb_reserve(skb, SKB_HEADER_LEN); + memcpy(skb_put(skb, len), out_buffer, len); + + rcu_read_lock(); + ret = send_to_sockaddr(skb, wg, &addr, rcu_dereference(wg->sock4), rcu_dereference(wg->sock6)); + rcu_read_unlock(); + + return ret; +} + +static int receive(struct sock *sk, struct sk_buff *skb) +{ + struct wireguard_device *wg; + + if (unlikely(!sk)) + goto err; + wg = sk->sk_user_data; + if (unlikely(!wg)) + goto err; + packet_receive(wg, skb); + return 0; + +err: + kfree_skb(skb); + return 0; +} + +/* Generates a default port from the interface name. + * wg0 --> 51820 + * wg1 --> 51821 + * wg2 --> 51822 + * wg100 --> 51920 + * wg60000 --> 46285 + * blahbla --> 51820 + * 50 --> 51870 + */ +static uint16_t generate_default_incoming_port(struct wireguard_device *wg) +{ + uint16_t port = 51820; + unsigned long parsed; + char *name, *digit_begin; + size_t len; + + ASSERT_RTNL(); + + name = netdev_pub(wg)->name; + len = strlen(name); + if (!len) + return port; + digit_begin = name + len - 1; + while (digit_begin >= name) { + if (*digit_begin >= '0' && *digit_begin <= '9') + --digit_begin; + else + break; + } + ++digit_begin; + if (!*digit_begin) + return port; + if (!kstrtoul(digit_begin, 10, &parsed)) + port += parsed; + if (!port) + ++port; + return port; +} + +static inline void sock_free(struct sock *sock) +{ + if (unlikely(!sock)) + return; + sk_clear_memalloc(sock); + udp_tunnel_sock_release(sock->sk_socket); +} + +static inline void set_sock_opts(struct socket *sock) +{ + sock->sk->sk_allocation = GFP_ATOMIC; + sock->sk->sk_sndbuf = INT_MAX; + sk_set_memalloc(sock->sk); +} + +int socket_init(struct wireguard_device *wg) +{ + struct udp_port_cfg port4 = { + .family = AF_INET, + .local_ip.s_addr = htonl(INADDR_ANY), + .use_udp_checksums = true + }; + struct udp_port_cfg port6 = { + .family = AF_INET6, + .local_ip6 = IN6ADDR_ANY_INIT, + .use_udp6_tx_checksums = true, + .use_udp6_rx_checksums = true, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) + .ipv6_v6only = true +#endif + }; + struct udp_tunnel_sock_cfg cfg = { + .sk_user_data = wg, + .encap_type = 1, + .encap_rcv = receive + }; + + int ret = 0; + struct socket *new4 = NULL, *new6 = NULL; +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) + int old_bindv6only; +#endif + + mutex_lock(&wg->socket_update_lock); + + if (rcu_dereference_protected(wg->sock4, lockdep_is_held(&wg->socket_update_lock)) || + rcu_dereference_protected(wg->sock6, lockdep_is_held(&wg->socket_update_lock))) { + ret = -EADDRINUSE; + goto out; + } + + if (!wg->incoming_port) + wg->incoming_port = generate_default_incoming_port(wg); + port4.local_udp_port = port6.local_udp_port = htons(wg->incoming_port); + + ret = udp_sock_create(wg->creating_net, &port4, &new4); + if (ret < 0) { + pr_err("Could not create outgoing IPv4 socket\n"); + goto out; + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) + /* Since udp_port_cfg only learned of ipv6_v6only in 4.3, we do this horrible + * hack here and set the sysctl variable temporarily to something that will + * set the right option for us in sock_create. It's super racey! */ + old_bindv6only = wg->creating_net->ipv6.sysctl.bindv6only; + wg->creating_net->ipv6.sysctl.bindv6only = 1; +#endif + ret = udp_sock_create(wg->creating_net, &port6, &new6); +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) + wg->creating_net->ipv6.sysctl.bindv6only = old_bindv6only; +#endif + if (ret < 0) { + pr_err("Could not create outgoing IPv6 socket\n"); + udp_tunnel_sock_release(new4); + goto out; + } + + set_sock_opts(new4); + set_sock_opts(new6); + setup_udp_tunnel_sock(wg->creating_net, new4, &cfg); + setup_udp_tunnel_sock(wg->creating_net, new6, &cfg); + rcu_assign_pointer(wg->sock4, new4->sk); + rcu_assign_pointer(wg->sock6, new6->sk); + +out: + mutex_unlock(&wg->socket_update_lock); + return ret; +} + +void socket_uninit(struct wireguard_device *wg) +{ + struct sock *old4, *old6; + mutex_lock(&wg->socket_update_lock); + old4 = rcu_dereference_protected(wg->sock4, lockdep_is_held(&wg->socket_update_lock)); + old6 = rcu_dereference_protected(wg->sock6, lockdep_is_held(&wg->socket_update_lock)); + rcu_assign_pointer(wg->sock4, NULL); + rcu_assign_pointer(wg->sock6, NULL); + mutex_unlock(&wg->socket_update_lock); + synchronize_rcu(); + sock_free(old4); + sock_free(old6); +} diff --git a/src/socket.h b/src/socket.h new file mode 100644 index 0000000..327246f --- /dev/null +++ b/src/socket.h @@ -0,0 +1,25 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef WGSOCKET_H +#define WGSOCKET_H + +#include +#include +#include +#include + +struct wireguard_device; + +#define SKB_HEADER_LEN (max(sizeof(struct iphdr), sizeof(struct ipv6hdr)) + sizeof(struct udphdr) + ETH_HLEN + VLAN_HLEN + 16) + +int socket_init(struct wireguard_device *wg); +void socket_uninit(struct wireguard_device *wg); +int socket_send_buffer_to_peer(struct wireguard_peer *peer, void *data, size_t len, u8 dscp); +int socket_send_skb_to_peer(struct wireguard_peer *peer, struct sk_buff *skb, u8 dscp); +int socket_send_buffer_as_reply_to_skb(struct sk_buff *in_skb, void *out_buffer, size_t len, struct wireguard_device *wg); + +int socket_addr_from_skb(struct sockaddr_storage *sockaddr, struct sk_buff *skb); +void socket_set_peer_addr(struct wireguard_peer *peer, struct sockaddr_storage *sockaddr); +void socket_set_peer_dst(struct wireguard_peer *peer); + +#endif diff --git a/src/timers.c b/src/timers.c new file mode 100644 index 0000000..47d7854 --- /dev/null +++ b/src/timers.c @@ -0,0 +1,168 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include "wireguard.h" +#include "timers.h" +#include "packets.h" +#include "device.h" + +enum { + KEEPALIVE = 10 * HZ, + MAX_TIMER_HANDSHAKES = (90 * HZ) / REKEY_TIMEOUT +}; + +/* + * Timer for retransmitting the handshake if we don't hear back after `REKEY_TIMEOUT` ms + * Timer for sending empty packet if we have received a packet but after have not sent one for `KEEPALIVE` ms + * Timer for initiating new handshake if we have sent a packet but after have not received one (even empty) for `(KEEPALIVE + REKEY_TIMEOUT)` ms + * Timer for zeroing out all ephemeral keys after `(REJECT_AFTER_TIME * 3)` ms if no new keys have been received + */ + +static void expired_retransmit_handshake(unsigned long ptr) +{ + struct wireguard_peer *peer = (struct wireguard_peer *)ptr; + + pr_debug("Handshake for peer %Lu (%pISpfsc) did not complete after %d seconds, retrying\n", peer->internal_id, &peer->endpoint_addr, REKEY_TIMEOUT / HZ); + if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) { + del_timer(&peer->timer_send_keepalive); + /* We remove all existing packets and don't try again, + * if we try unsuccessfully for too long to make a handshake. */ + skb_queue_purge(&peer->tx_packet_queue); + return; + } + packet_queue_send_handshake_initiation(peer); + ++peer->timer_handshake_attempts; +} + +static void expired_send_keepalive(unsigned long ptr) +{ + struct wireguard_peer *peer = (struct wireguard_peer *)ptr; + + pr_debug("Sending keep alive packet to peer %Lu (%pISpfsc), since we received data, but haven't sent any for %d seconds\n", peer->internal_id, &peer->endpoint_addr, KEEPALIVE / HZ); + packet_send_keepalive(peer); + if (peer->timer_need_another_keepalive) { + peer->timer_need_another_keepalive = false; + mod_timer(&peer->timer_send_keepalive, jiffies + KEEPALIVE); + } +} + +static void expired_new_handshake(unsigned long ptr) +{ + struct wireguard_peer *peer = (struct wireguard_peer *)ptr; + + pr_debug("Retrying handshake with peer %Lu (%pISpfsc) because we stopped hearing back after %d seconds\n", peer->internal_id, &peer->endpoint_addr, (KEEPALIVE + REKEY_TIMEOUT) / HZ); + packet_queue_send_handshake_initiation(peer); +} + +static void expired_kill_ephemerals(unsigned long ptr) +{ + struct wireguard_peer *peer = (struct wireguard_peer *)ptr; + + rcu_read_lock(); + peer = peer_get(peer); + rcu_read_unlock(); + if (!peer) + return; + + if (!queue_work(peer->device->workqueue, &peer->clear_peer_work)) + peer_put(peer); /* If the work was already on the queue, we want to drop the extra reference */ +} +static void queued_expired_kill_ephemerals(struct work_struct *work) +{ + struct wireguard_peer *peer = container_of(work, struct wireguard_peer, clear_peer_work); + + pr_debug("Zeroing out all keys for peer %Lu (%pISpfsc), since we haven't received a new one in %d seconds\n", peer->internal_id, &peer->endpoint_addr, (REJECT_AFTER_TIME * 3) / HZ); + noise_handshake_clear(&peer->handshake); + noise_keypairs_clear(&peer->keypairs); + peer_put(peer); +} + +void timers_data_sent(struct wireguard_peer *peer) +{ + if (likely(peer->timer_send_keepalive.data)) + del_timer(&peer->timer_send_keepalive); + + if (likely(peer->timer_new_handshake.data) && !timer_pending(&peer->timer_new_handshake)) + mod_timer(&peer->timer_new_handshake, jiffies + KEEPALIVE + REKEY_TIMEOUT); +} + +void timers_data_received(struct wireguard_peer *peer) +{ + if (likely(peer->timer_send_keepalive.data) && !timer_pending(&peer->timer_send_keepalive)) + mod_timer(&peer->timer_send_keepalive, jiffies + KEEPALIVE); + else + peer->timer_need_another_keepalive = true; +} + +void timers_any_authorized_packet_received(struct wireguard_peer *peer) +{ + if (likely(peer->timer_new_handshake.data)) + del_timer(&peer->timer_new_handshake); +} + +void timers_handshake_initiated(struct wireguard_peer *peer) +{ + if (likely(peer->timer_send_keepalive.data)) + del_timer(&peer->timer_send_keepalive); + if (likely(peer->timer_retransmit_handshake.data)) + mod_timer(&peer->timer_retransmit_handshake, jiffies + REKEY_TIMEOUT); +} + +void timers_handshake_complete(struct wireguard_peer *peer) +{ + if (likely(peer->timer_retransmit_handshake.data)) + del_timer(&peer->timer_retransmit_handshake); + peer->timer_handshake_attempts = 0; +} + +void timers_ephemeral_key_created(struct wireguard_peer *peer) +{ + if (likely(peer->timer_kill_ephemerals.data)) + mod_timer(&peer->timer_kill_ephemerals, jiffies + (REJECT_AFTER_TIME * 3)); + do_gettimeofday(&peer->walltime_last_handshake); +} + +void timers_init_peer(struct wireguard_peer *peer) +{ + init_timer(&peer->timer_retransmit_handshake); + peer->timer_retransmit_handshake.function = expired_retransmit_handshake; + peer->timer_retransmit_handshake.data = (unsigned long)peer; + + init_timer(&peer->timer_send_keepalive); + peer->timer_send_keepalive.function = expired_send_keepalive; + peer->timer_send_keepalive.data = (unsigned long)peer; + + init_timer(&peer->timer_new_handshake); + peer->timer_new_handshake.function = expired_new_handshake; + peer->timer_new_handshake.data = (unsigned long)peer; + + init_timer(&peer->timer_kill_ephemerals); + peer->timer_kill_ephemerals.function = expired_kill_ephemerals; + peer->timer_kill_ephemerals.data = (unsigned long)peer; + + INIT_WORK(&peer->clear_peer_work, queued_expired_kill_ephemerals); +} + +void timers_uninit_peer(struct wireguard_peer *peer) +{ + if (peer->timer_retransmit_handshake.data) { + del_timer(&peer->timer_retransmit_handshake); + peer->timer_retransmit_handshake.data = 0; + } + if (peer->timer_send_keepalive.data) { + del_timer(&peer->timer_send_keepalive); + peer->timer_send_keepalive.data = 0; + } + if (peer->timer_new_handshake.data) { + del_timer(&peer->timer_new_handshake); + peer->timer_new_handshake.data = 0; + } + if (peer->timer_kill_ephemerals.data) { + del_timer(&peer->timer_kill_ephemerals); + peer->timer_kill_ephemerals.data = 0; + } +} +void timers_uninit_peer_wait(struct wireguard_peer *peer) +{ + timers_uninit_peer(peer); + flush_work(&peer->clear_peer_work); +} diff --git a/src/timers.h b/src/timers.h new file mode 100644 index 0000000..c5e9678 --- /dev/null +++ b/src/timers.h @@ -0,0 +1,19 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef WGTIMERS_H +#define WGTIMERS_H + +struct wireguard_peer; + +void timers_init_peer(struct wireguard_peer *peer); +void timers_uninit_peer(struct wireguard_peer *peer); +void timers_uninit_peer_wait(struct wireguard_peer *peer); + +void timers_data_sent(struct wireguard_peer *peer); +void timers_data_received(struct wireguard_peer *peer); +void timers_any_authorized_packet_received(struct wireguard_peer *peer); +void timers_handshake_initiated(struct wireguard_peer *peer); +void timers_handshake_complete(struct wireguard_peer *peer); +void timers_ephemeral_key_created(struct wireguard_peer *peer); + +#endif diff --git a/src/tools/.gitignore b/src/tools/.gitignore new file mode 100644 index 0000000..359f8b5 --- /dev/null +++ b/src/tools/.gitignore @@ -0,0 +1,3 @@ +*.d +*.o +wg diff --git a/src/tools/Makefile b/src/tools/Makefile new file mode 100644 index 0000000..4eddd25 --- /dev/null +++ b/src/tools/Makefile @@ -0,0 +1,26 @@ +PREFIX ?= /usr +DESTDIR ?= +BINDIR ?= $(PREFIX)/bin +LIBDIR ?= $(PREFIX)/lib +MANDIR ?= $(PREFIX)/share/man + +CFLAGS += -std=gnu11 +CFLAGS += -pedantic -Wall -Wextra +CFLAGS += -MMD +LDLIBS += -lresolv -lmnl + +wg: $(patsubst %.c,%.o,$(wildcard *.c)) + +clean: + rm -f wg *.o *.d + +install: wg + install -v -d "$(DESTDIR)$(BINDIR)" && install -s -m 0755 -v wg "$(DESTDIR)$(BINDIR)/wg" + install -v -d "$(DESTDIR)$(MANDIR)/man8" && install -m 0644 -v wg.8 "$(DESTDIR)$(MANDIR)/man8/wg.8" + +check: clean + CFLAGS=-g scan-build --view --keep-going $(MAKE) wg + +.PHONY: clean install check + +-include *.d diff --git a/src/tools/base64.c b/src/tools/base64.c new file mode 100644 index 0000000..cf37464 --- /dev/null +++ b/src/tools/base64.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 1996, 1998 by Internet Software Consortium. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS + * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE + * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL + * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR + * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Portions Copyright (c) 1995 by International Business Machines, Inc. + * + * International Business Machines, Inc. (hereinafter called IBM) grants + * permission under its copyrights to use, copy, modify, and distribute this + * Software with or without fee, provided that the above copyright notice and + * all paragraphs of this notice appear in all copies, and that the name of IBM + * not be used in connection with the marketing of any product incorporating + * the Software or modifications thereof, without specific, written prior + * permission. + * + * To the extent it has a right to do so, IBM grants an immunity from suit + * under its patents, if any, for the use, sale or manufacture of products to + * the extent that such products are used for performing Domain Name System + * dynamic updates in TCP/IP networks by means of the Software. No immunity is + * granted for any product per se or for any other function of any product. + * + * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE. IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL, + * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN + * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES. + */ + +#include "base64.h" +#include +#include +#include +#include + +#if defined(NEED_B64_NTOP) || defined(NEED_B64_PTON) +static const char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static const char pad64 = '='; +#endif + +#ifdef NEED_B64_NTOP +int b64_ntop(unsigned char const *src, size_t srclength, char *target, size_t targsize) +{ + size_t datalength = 0; + uint8_t input[3]; + uint8_t output[4]; + size_t i; + + while (2 < srclength) { + input[0] = *src++; + input[1] = *src++; + input[2] = *src++; + srclength -= 3; + + output[0] = input[0] >> 2; + output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4); + output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6); + output[3] = input[2] & 0x3f; + assert(output[0] < 64); + assert(output[1] < 64); + assert(output[2] < 64); + assert(output[3] < 64); + + if (datalength + 4 > targsize) + return -1; + target[datalength++] = base64[output[0]]; + target[datalength++] = base64[output[1]]; + target[datalength++] = base64[output[2]]; + target[datalength++] = base64[output[3]]; + } + if (0 != srclength) { + input[0] = input[1] = input[2] = '\0'; + for (i = 0; i < srclength; i++) + input[i] = *src++; + output[0] = input[0] >> 2; + output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4); + output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6); + assert(output[0] < 64); + assert(output[1] < 64); + assert(output[2] < 64); + + if (datalength + 4 > targsize) + return -1; + target[datalength++] = base64[output[0]]; + target[datalength++] = base64[output[1]]; + if (srclength == 1) + target[datalength++] = pad64; + else + target[datalength++] = base64[output[2]]; + target[datalength++] = pad64; + } + if (datalength >= targsize) + return (-1); + target[datalength] = '\0'; + return datalength; +} +#endif + +#ifdef NEED_B64_PTON +int b64_pton(char const *src, uint8_t *target, size_t targsize) +{ + static int b64rmap_initialized = 0; + static uint8_t b64rmap[256]; + static const uint8_t b64rmap_special = 0xf0; + static const uint8_t b64rmap_end = 0xfd; + static const uint8_t b64rmap_space = 0xfe; + static const uint8_t b64rmap_invalid = 0xff; + int tarindex, state, ch; + uint8_t ofs; + + if (!b64rmap_initialized) { + int i; + char ch; + b64rmap[0] = b64rmap_end; + for (i = 1; i < 256; ++i) { + ch = (char)i; + if (isspace(ch)) + b64rmap[i] = b64rmap_space; + else if (ch == pad64) + b64rmap[i] = b64rmap_end; + else + b64rmap[i] = b64rmap_invalid; + } + for (i = 0; base64[i] != '\0'; ++i) + b64rmap[(uint8_t)base64[i]] = i; + b64rmap_initialized = 1; + } + + state = 0; + tarindex = 0; + + for (;;) { + ch = *src++; + ofs = b64rmap[ch]; + + if (ofs >= b64rmap_special) { + if (ofs == b64rmap_space) + continue; + if (ofs == b64rmap_end) + break; + return -1; + } + + switch (state) { + case 0: + if ((size_t)tarindex >= targsize) + return -1; + target[tarindex] = ofs << 2; + state = 1; + break; + case 1: + if ((size_t)tarindex + 1 >= targsize) + return -1; + target[tarindex] |= ofs >> 4; + target[tarindex+1] = (ofs & 0x0f) << 4 ; + tarindex++; + state = 2; + break; + case 2: + if ((size_t)tarindex + 1 >= targsize) + return -1; + target[tarindex] |= ofs >> 2; + target[tarindex+1] = (ofs & 0x03) << 6; + tarindex++; + state = 3; + break; + case 3: + if ((size_t)tarindex >= targsize) + return -1; + target[tarindex] |= ofs; + tarindex++; + state = 0; + break; + default: + abort(); + } + } + + if (ch == pad64) { + ch = *src++; + switch (state) { + case 0: + case 1: + return -1; + + case 2: + for (; ch; ch = *src++) { + if (b64rmap[ch] != b64rmap_space) + break; + } + if (ch != pad64) + return -1; + ch = *src++; + case 3: + for (; ch; ch = *src++) { + if (b64rmap[ch] != b64rmap_space) + return -1; + } + if (target[tarindex] != 0) + return -1; + } + } else { + if (state != 0) + return -1; + } + + return tarindex; +} +#endif diff --git a/src/tools/base64.h b/src/tools/base64.h new file mode 100644 index 0000000..5cc94e1 --- /dev/null +++ b/src/tools/base64.h @@ -0,0 +1,20 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef BASE64_H +#define BASE64_H + +#include + +#define b64_len(len) ((((len) + 2) / 3) * 4 + 1) + +#ifndef b64_ntop +int b64_ntop(unsigned char const *, size_t, char *, size_t); +#define NEED_B64_NTOP +#endif + +#ifndef b64_pton +int b64_pton(char const *, unsigned char *, size_t); +#define NEED_B64_PTON +#endif + +#endif diff --git a/src/tools/config.c b/src/tools/config.c new file mode 100644 index 0000000..0cec30e --- /dev/null +++ b/src/tools/config.c @@ -0,0 +1,518 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "kernel.h" +#include "base64.h" + +#define COMMENT_CHAR '#' + +#define max(a, b) (a > b ? a : b) + +static inline struct wgpeer *peer_from_offset(struct wgdevice *dev, size_t offset) +{ + return (struct wgpeer *)((uint8_t *)dev + sizeof(struct wgdevice) + offset); +} + +static int use_space(struct inflatable_device *buf, size_t space) +{ + size_t expand_to; + uint8_t *new_dev; + + if (buf->len - buf->pos < space) { + expand_to = max(buf->len * 2, buf->len + space); + new_dev = realloc(buf->dev, expand_to + sizeof(struct wgdevice)); + if (!new_dev) + return -errno; + memset(&new_dev[buf->len + sizeof(struct wgdevice)], 0, expand_to - buf->len); + buf->dev = (struct wgdevice *)new_dev; + buf->len = expand_to; + } + buf->pos += space; + return 0; +} + +static const char *get_value(const char *line, const char *key) +{ + size_t linelen = strlen(line); + size_t keylen = strlen(key); + + if (keylen >= linelen) + return NULL; + + if (strncasecmp(line, key, keylen)) + return NULL; + + return line + keylen; +} + +static inline uint16_t parse_port(const char *value) +{ + int ret; + uint16_t port = 0; + struct addrinfo *resolved; + struct addrinfo hints = { + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_DGRAM, + .ai_protocol = IPPROTO_UDP, + .ai_flags = AI_ADDRCONFIG | AI_PASSIVE + }; + + if (!strlen(value)) { + fprintf(stderr, "Unable to parse empty port\n"); + return 0; + } + + ret = getaddrinfo(NULL, value, &hints, &resolved); + if (ret != 0) { + fprintf(stderr, "%s: `%s`\n", gai_strerror(ret), value); + return 0; + } + + if (resolved->ai_family == AF_INET && resolved->ai_addrlen == sizeof(struct sockaddr_in)) + port = ntohs(((struct sockaddr_in *)resolved->ai_addr)->sin_port); + else if (resolved->ai_family == AF_INET6 && resolved->ai_addrlen == sizeof(struct sockaddr_in6)) + port = ntohs(((struct sockaddr_in6 *)resolved->ai_addr)->sin6_port); + else + fprintf(stderr, "Neither IPv4 nor IPv6 address found: `%s`\n", value); + + freeaddrinfo(resolved); + return port; +} + +static inline bool parse_key(uint8_t key[WG_KEY_LEN], const char *value) +{ + uint8_t tmp[WG_KEY_LEN + 1]; + if (strlen(value) != b64_len(WG_KEY_LEN) - 1) { + fprintf(stderr, "Key is not the correct length: `%s`\n", value); + return false; + } + if (b64_pton(value, tmp, WG_KEY_LEN + 1) < 0) { + fprintf(stderr, "Could not parse base64 key: `%s`\n", value); + return false; + } + memcpy(key, tmp, WG_KEY_LEN); + return true; +} + +static inline bool parse_ip(struct wgipmask *ipmask, const char *value) +{ + ipmask->family = AF_UNSPEC; + if (strchr(value, ':')) { + if (inet_pton(AF_INET6, value, &ipmask->ip6) == 1) + ipmask->family = AF_INET6; + } else { + if (inet_pton(AF_INET, value, &ipmask->ip4) == 1) + ipmask->family = AF_INET; + } + if (ipmask->family == AF_UNSPEC) { + fprintf(stderr, "Unable to parse IP address: `%s`\n", value); + return false; + } + return true; +} + +static inline bool parse_endpoint(struct sockaddr_storage *endpoint, const char *value) +{ + char *mutable = strdup(value); + char *begin, *end; + int ret; + struct addrinfo *resolved; + struct addrinfo hints = { + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_DGRAM, + .ai_protocol = IPPROTO_UDP, + .ai_flags = AI_ADDRCONFIG + }; + if (!strlen(value)) { + free(mutable); + fprintf(stderr, "Unable to parse empty endpoint\n"); + return false; + } + if (mutable[0] == '[') { + begin = &mutable[1]; + end = strchr(mutable, ']'); + if (!end) { + free(mutable); + fprintf(stderr, "Unable to find matching brace of endpoint: `%s`\n", value); + return false; + } + *end = '\0'; + ++end; + if (*end != ':' || !*(end + 1)) { + free(mutable); + fprintf(stderr, "Unable to find port of endpoint: `%s`\n", value); + return false; + } + ++end; + } else { + begin = mutable; + end = strrchr(mutable, ':'); + if (!end || !*(end + 1)) { + free(mutable); + fprintf(stderr, "Unable to find port of endpoint: `%s`\n", value); + return false; + } + *end = '\0'; + ++end; + } + ret = getaddrinfo(begin, end, &hints, &resolved); + if (ret != 0) { + free(mutable); + fprintf(stderr, "%s: `%s`\n", gai_strerror(ret), value); + return false; + } + if ((resolved->ai_family == AF_INET && resolved->ai_addrlen == sizeof(struct sockaddr_in)) || + (resolved->ai_family == AF_INET6 && resolved->ai_addrlen == sizeof(struct sockaddr_in6))) + memcpy(endpoint, resolved->ai_addr, resolved->ai_addrlen); + else { + freeaddrinfo(resolved); + free(mutable); + fprintf(stderr, "Neither IPv4 nor IPv6 address found: `%s`\n", value); + return false; + } + freeaddrinfo(resolved); + free(mutable); + return true; +} + +static inline bool parse_ipmasks(struct inflatable_device *buf, size_t peer_offset, const char *value) +{ + struct wgpeer *peer; + struct wgipmask *ipmask; + char *mask, *mutable = strdup(value), *sep; + if (!mutable) { + perror("strdup"); + return false; + }; + peer = peer_from_offset(buf->dev, peer_offset); + peer->num_ipmasks = 0; + peer->replace_ipmasks = true; + if (!strlen(value)) { + free(mutable); + return true; + } + sep = mutable; + while ((mask = strsep(&sep, ","))) { + unsigned long cidr; + char *end, *ip = strsep(&mask, "/"); + if (use_space(buf, sizeof(struct wgipmask)) < 0) { + perror("use_space"); + free(mutable); + return false; + } + peer = peer_from_offset(buf->dev, peer_offset); + ipmask = (struct wgipmask *)((uint8_t *)peer + sizeof(struct wgpeer) + (sizeof(struct wgipmask) * peer->num_ipmasks)); + + if (!parse_ip(ipmask, ip)) { + free(mutable); + return false; + } + if (ipmask->family == AF_INET) { + if (mask) { + cidr = strtoul(mask, &end, 10); + if (*end) + mask = NULL; + if (cidr > 32) + mask = NULL; + } + if (!mask) + cidr = 32; + } else if (ipmask->family == AF_INET6) { + if (mask) { + cidr = strtoul(mask, &end, 10); + if (*end) + mask = NULL; + if (cidr > 128) + mask = NULL; + } + if (!mask) + cidr = 128; + } else + continue; + ipmask->cidr = cidr; + ++peer->num_ipmasks; + } + free(mutable); + return true; +} + +static bool process_line(struct config_ctx *ctx, const char *line) +{ + const char *value; + bool ret = true; + + if (!strcasecmp(line, "[Interface]")) { + ctx->is_peer_section = false; + ctx->is_device_section = true; + return true; + } + if (!strcasecmp(line, "[Peer]")) { + ctx->peer_offset = ctx->buf.pos; + if (use_space(&ctx->buf, sizeof(struct wgpeer)) < 0) { + perror("use_space"); + return false; + } + ++ctx->buf.dev->num_peers; + ctx->is_peer_section = true; + ctx->is_device_section = false; + peer_from_offset(ctx->buf.dev, ctx->peer_offset)->replace_ipmasks = true; + return true; + } + +#define key_match(key) (value = get_value(line, key "=")) + + if (ctx->is_device_section) { + if (key_match("ListenPort")) + ret = !!(ctx->buf.dev->port = parse_port(value)); + else if (key_match("PrivateKey")) { + ret = parse_key(ctx->buf.dev->private_key, value); + if (!ret) + memset(ctx->buf.dev->private_key, 0, WG_KEY_LEN); + } else if (key_match("PresharedKey")) { + ret = parse_key(ctx->buf.dev->preshared_key, value); + if (!ret) + memset(ctx->buf.dev->preshared_key, 0, WG_KEY_LEN); + } else + goto error; + } else if (ctx->is_peer_section) { + if (key_match("Endpoint")) + ret = parse_endpoint(&peer_from_offset(ctx->buf.dev, ctx->peer_offset)->endpoint, value); + else if (key_match("PublicKey")) + ret = parse_key(peer_from_offset(ctx->buf.dev, ctx->peer_offset)->public_key, value); + else if (key_match("AllowedIPs")) + ret = parse_ipmasks(&ctx->buf, ctx->peer_offset, value); + else + goto error; + } else + goto error; + return ret; + +#undef key_match + +error: + fprintf(stderr, "Line unrecognized: `%s'\n", line); + return false; +} + +bool config_read_line(struct config_ctx *ctx, const char *input) +{ + size_t len = strlen(input), cleaned_len = 0; + char *line = calloc(len + 1, sizeof(char)); + bool ret = true; + if (!line) { + perror("calloc"); + return false; + } + if (!len) + goto out; + for (size_t i = 0; i < len; ++i) { + if (!isspace(input[i])) + line[cleaned_len++] = input[i]; + } + if (!cleaned_len) + goto out; + if (line[0] == COMMENT_CHAR) + goto out; + ret = process_line(ctx, line); +out: + free(line); + return ret; +} + +bool config_read_init(struct config_ctx *ctx, struct wgdevice **device, bool append) +{ + memset(ctx, 0, sizeof(struct config_ctx)); + ctx->device = device; + ctx->buf.dev = calloc(1, sizeof(struct wgdevice)); + if (!ctx->buf.dev) { + perror("calloc"); + return false; + } + ctx->buf.dev->replace_peer_list = !append; + return true; +} + +static inline bool key_is_valid(uint8_t key[WG_KEY_LEN]) +{ + static const uint8_t zero[WG_KEY_LEN] = { 0 }; + return !!memcmp(key, zero, WG_KEY_LEN); +} + +bool config_read_finish(struct config_ctx *ctx) +{ + size_t i; + struct wgpeer *peer; + if (ctx->buf.dev->replace_peer_list && !ctx->buf.dev->num_peers) { + fprintf(stderr, "No peers configured\n"); + goto err; + } + if (ctx->buf.dev->replace_peer_list && !key_is_valid(ctx->buf.dev->private_key)) { + fprintf(stderr, "No private key configured\n"); + goto err; + } + for_each_wgpeer(ctx->buf.dev, peer, i) { + if (!key_is_valid(peer->public_key)) { + fprintf(stderr, "A peer is missing a public key\n"); + goto err; + } + } + *ctx->device = ctx->buf.dev; + return true; +err: + free(ctx->buf.dev); + return false; +} + +static int read_line(char **dst, const char *path) +{ + FILE *f; + size_t n = 0; + struct stat stat; + + *dst = NULL; + + f = fopen(path, "r"); + if (!f) { + perror("fopen"); + return -1; + } + if (fstat(fileno(f), &stat) < 0) { + perror("fstat"); + fclose(f); + return -1; + } + if (S_ISCHR(stat.st_mode) && stat.st_rdev == makedev(1, 3)) { + fclose(f); + return 1; + } + if (getline(dst, &n, f) < 0) { + perror("getline"); + fclose(f); + return -1; + } + fclose(f); + n = strlen(*dst); + while (--n) { + if (isspace((*dst)[n])) + (*dst)[n] = '\0'; + } + return 0; +} + +static char *strip_spaces(const char *in) +{ + char *out; + size_t t, l, i; + + t = strlen(in); + out = calloc(t + 1, sizeof(char)); + if (!out) { + perror("calloc"); + return NULL; + } + for (i = 0, l = 0; i < t; ++i) { + if (!isspace(in[i])) + out[l++] = in[i]; + } + return out; +} + +bool config_read_cmd(struct wgdevice **device, char *argv[], int argc) +{ + struct inflatable_device buf = { 0 }; + size_t peer_offset = 0; + buf.dev = calloc(sizeof(struct wgdevice), 1); + if (!buf.dev) { + perror("calloc"); + return false; + } + while (argc > 0) { + if (!strcmp(argv[0], "listen-port") && argc >= 2 && !buf.dev->num_peers) { + buf.dev->port = parse_port(argv[1]); + if (!buf.dev->port) + goto error; + argv += 2; + argc -= 2; + } else if (!strcmp(argv[0], "private-key") && argc >= 2 && !buf.dev->num_peers) { + char *line; + int ret = read_line(&line, argv[1]); + if (ret == 0) { + if (!parse_key(buf.dev->private_key, line)) { + free(line); + goto error; + } + free(line); + } else if (ret == 1) + buf.dev->remove_private_key = true; + else + goto error; + argv += 2; + argc -= 2; + } else if (!strcmp(argv[0], "preshared-key") && argc >= 2 && !buf.dev->num_peers) { + char *line; + int ret = read_line(&line, argv[1]); + if (ret == 0) { + if (!parse_key(buf.dev->preshared_key, line)) { + free(line); + goto error; + } + free(line); + } else if (ret == 1) + buf.dev->remove_preshared_key = true; + else + goto error; + argv += 2; + argc -= 2; + } else if (!strcmp(argv[0], "peer") && argc >= 2) { + peer_offset = buf.pos; + if (use_space(&buf, sizeof(struct wgpeer)) < 0) { + perror("use_space"); + goto error; + } + ++buf.dev->num_peers; + if (!parse_key(peer_from_offset(buf.dev, peer_offset)->public_key, argv[1])) + goto error; + argv += 2; + argc -= 2; + } else if (!strcmp(argv[0], "remove") && argc >= 1 && buf.dev->num_peers) { + peer_from_offset(buf.dev, peer_offset)->remove_me = true; + argv += 1; + argc -= 1; + } else if (!strcmp(argv[0], "endpoint") && argc >= 2 && buf.dev->num_peers) { + if (!parse_endpoint(&peer_from_offset(buf.dev, peer_offset)->endpoint, argv[1])) + goto error; + argv += 2; + argc -= 2; + } else if (!strcmp(argv[0], "allowed-ips") && argc >= 2 && buf.dev->num_peers) { + char *line = strip_spaces(argv[1]); + if (!line) + goto error; + if (!parse_ipmasks(&buf, peer_offset, line)) { + free(line); + goto error; + } + free(line); + argv += 2; + argc -= 2; + } else { + fprintf(stderr, "Invalid argument: %s\n", argv[0]); + goto error; + } + } + *device = buf.dev; + return true; +error: + free(buf.dev); + return false; +} diff --git a/src/tools/config.h b/src/tools/config.h new file mode 100644 index 0000000..268e3f6 --- /dev/null +++ b/src/tools/config.h @@ -0,0 +1,34 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef CONFIG_H +#define CONFIG_H + +#include +#include +#include +#include +#include +#include +#include +#include "../uapi.h" + +struct inflatable_device { + struct wgdevice *dev; + size_t len; + size_t pos; +}; + +struct config_ctx { + struct inflatable_device buf; + size_t peer_offset; + struct wgdevice **device; + bool is_peer_section; + bool is_device_section; +}; + +bool config_read_cmd(struct wgdevice **dev, char *argv[], int argc); +bool config_read_init(struct config_ctx *ctx, struct wgdevice **device, bool append); +bool config_read_line(struct config_ctx *ctx, const char *line); +bool config_read_finish(struct config_ctx *ctx); + +#endif diff --git a/src/tools/curve25519.c b/src/tools/curve25519.c new file mode 100644 index 0000000..6c26535 --- /dev/null +++ b/src/tools/curve25519.c @@ -0,0 +1,1258 @@ +/* Original author: Adam Langley + * + * Copyright 2008 Google Inc. All Rights Reserved. + * Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. + * + * Redistribution and use in source and binary forms of this file, with or + * without modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "curve25519.h" + +#include +#include + +#ifndef __always_inline +#define __always_inline __inline __attribute__((__always_inline__)) +#endif + +#ifdef __SIZEOF_INT128__ +typedef uint64_t limb; +typedef limb felem[5]; +typedef __uint128_t uint128_t; + +/* Sum two numbers: output += in */ +static __always_inline void fsum(limb *output, const limb *in) +{ + output[0] += in[0]; + output[1] += in[1]; + output[2] += in[2]; + output[3] += in[3]; + output[4] += in[4]; +} + +/* Find the difference of two numbers: output = in - output + * (note the order of the arguments!) + * + * Assumes that out[i] < 2**52 + * On return, out[i] < 2**55 + */ +static __always_inline void fdifference_backwards(felem out, const felem in) +{ + /* 152 is 19 << 3 */ + static const limb two54m152 = (((limb)1) << 54) - 152; + static const limb two54m8 = (((limb)1) << 54) - 8; + + out[0] = in[0] + two54m152 - out[0]; + out[1] = in[1] + two54m8 - out[1]; + out[2] = in[2] + two54m8 - out[2]; + out[3] = in[3] + two54m8 - out[3]; + out[4] = in[4] + two54m8 - out[4]; +} + +/* Multiply a number by a scalar: output = in * scalar */ +static __always_inline void fscalar_product(felem output, const felem in, const limb scalar) +{ + uint128_t a; + + a = ((uint128_t) in[0]) * scalar; + output[0] = ((limb)a) & 0x7ffffffffffffUL; + + a = ((uint128_t) in[1]) * scalar + ((limb) (a >> 51)); + output[1] = ((limb)a) & 0x7ffffffffffffUL; + + a = ((uint128_t) in[2]) * scalar + ((limb) (a >> 51)); + output[2] = ((limb)a) & 0x7ffffffffffffUL; + + a = ((uint128_t) in[3]) * scalar + ((limb) (a >> 51)); + output[3] = ((limb)a) & 0x7ffffffffffffUL; + + a = ((uint128_t) in[4]) * scalar + ((limb) (a >> 51)); + output[4] = ((limb)a) & 0x7ffffffffffffUL; + + output[0] += (a >> 51) * 19; +} + +/* Multiply two numbers: output = in2 * in + * + * output must be distinct to both inputs. The inputs are reduced coefficient + * form, the output is not. + * + * Assumes that in[i] < 2**55 and likewise for in2. + * On return, output[i] < 2**52 + */ +static __always_inline void fmul(felem output, const felem in2, const felem in) +{ + uint128_t t[5]; + limb r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c; + + r0 = in[0]; + r1 = in[1]; + r2 = in[2]; + r3 = in[3]; + r4 = in[4]; + + s0 = in2[0]; + s1 = in2[1]; + s2 = in2[2]; + s3 = in2[3]; + s4 = in2[4]; + + t[0] = ((uint128_t) r0) * s0; + t[1] = ((uint128_t) r0) * s1 + ((uint128_t) r1) * s0; + t[2] = ((uint128_t) r0) * s2 + ((uint128_t) r2) * s0 + ((uint128_t) r1) * s1; + t[3] = ((uint128_t) r0) * s3 + ((uint128_t) r3) * s0 + ((uint128_t) r1) * s2 + ((uint128_t) r2) * s1; + t[4] = ((uint128_t) r0) * s4 + ((uint128_t) r4) * s0 + ((uint128_t) r3) * s1 + ((uint128_t) r1) * s3 + ((uint128_t) r2) * s2; + + r4 *= 19; + r1 *= 19; + r2 *= 19; + r3 *= 19; + + t[0] += ((uint128_t) r4) * s1 + ((uint128_t) r1) * s4 + ((uint128_t) r2) * s3 + ((uint128_t) r3) * s2; + t[1] += ((uint128_t) r4) * s2 + ((uint128_t) r2) * s4 + ((uint128_t) r3) * s3; + t[2] += ((uint128_t) r4) * s3 + ((uint128_t) r3) * s4; + t[3] += ((uint128_t) r4) * s4; + + r0 = (limb)t[0] & 0x7ffffffffffffUL; c = (limb)(t[0] >> 51); + t[1] += c; r1 = (limb)t[1] & 0x7ffffffffffffUL; c = (limb)(t[1] >> 51); + t[2] += c; r2 = (limb)t[2] & 0x7ffffffffffffUL; c = (limb)(t[2] >> 51); + t[3] += c; r3 = (limb)t[3] & 0x7ffffffffffffUL; c = (limb)(t[3] >> 51); + t[4] += c; r4 = (limb)t[4] & 0x7ffffffffffffUL; c = (limb)(t[4] >> 51); + r0 += c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffffUL; + r1 += c; c = r1 >> 51; r1 = r1 & 0x7ffffffffffffUL; + r2 += c; + + output[0] = r0; + output[1] = r1; + output[2] = r2; + output[3] = r3; + output[4] = r4; +} + +static __always_inline void fsquare_times(felem output, const felem in, limb count) +{ + uint128_t t[5]; + limb r0,r1,r2,r3,r4,c; + limb d0,d1,d2,d4,d419; + + r0 = in[0]; + r1 = in[1]; + r2 = in[2]; + r3 = in[3]; + r4 = in[4]; + + do { + d0 = r0 * 2; + d1 = r1 * 2; + d2 = r2 * 2 * 19; + d419 = r4 * 19; + d4 = d419 * 2; + + t[0] = ((uint128_t) r0) * r0 + ((uint128_t) d4) * r1 + (((uint128_t) d2) * (r3 )); + t[1] = ((uint128_t) d0) * r1 + ((uint128_t) d4) * r2 + (((uint128_t) r3) * (r3 * 19)); + t[2] = ((uint128_t) d0) * r2 + ((uint128_t) r1) * r1 + (((uint128_t) d4) * (r3 )); + t[3] = ((uint128_t) d0) * r3 + ((uint128_t) d1) * r2 + (((uint128_t) r4) * (d419 )); + t[4] = ((uint128_t) d0) * r4 + ((uint128_t) d1) * r3 + (((uint128_t) r2) * (r2 )); + + r0 = (limb)t[0] & 0x7ffffffffffffUL; c = (limb)(t[0] >> 51); + t[1] += c; r1 = (limb)t[1] & 0x7ffffffffffffUL; c = (limb)(t[1] >> 51); + t[2] += c; r2 = (limb)t[2] & 0x7ffffffffffffUL; c = (limb)(t[2] >> 51); + t[3] += c; r3 = (limb)t[3] & 0x7ffffffffffffUL; c = (limb)(t[3] >> 51); + t[4] += c; r4 = (limb)t[4] & 0x7ffffffffffffUL; c = (limb)(t[4] >> 51); + r0 += c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffffUL; + r1 += c; c = r1 >> 51; r1 = r1 & 0x7ffffffffffffUL; + r2 += c; + } while(--count); + + output[0] = r0; + output[1] = r1; + output[2] = r2; + output[3] = r3; + output[4] = r4; +} + +/* Load a little-endian 64-bit number */ +static limb load_limb(const uint8_t *in) +{ + return + ((limb)in[0]) | + (((limb)in[1]) << 8) | + (((limb)in[2]) << 16) | + (((limb)in[3]) << 24) | + (((limb)in[4]) << 32) | + (((limb)in[5]) << 40) | + (((limb)in[6]) << 48) | + (((limb)in[7]) << 56); +} + +static void store_limb(uint8_t *out, limb in) +{ + out[0] = in & 0xff; + out[1] = (in >> 8) & 0xff; + out[2] = (in >> 16) & 0xff; + out[3] = (in >> 24) & 0xff; + out[4] = (in >> 32) & 0xff; + out[5] = (in >> 40) & 0xff; + out[6] = (in >> 48) & 0xff; + out[7] = (in >> 56) & 0xff; +} + +/* Take a little-endian, 32-byte number and expand it into polynomial form */ +static void fexpand(limb *output, const uint8_t *in) +{ + output[0] = load_limb(in) & 0x7ffffffffffffUL; + output[1] = (load_limb(in+6) >> 3) & 0x7ffffffffffffUL; + output[2] = (load_limb(in+12) >> 6) & 0x7ffffffffffffUL; + output[3] = (load_limb(in+19) >> 1) & 0x7ffffffffffffUL; + output[4] = (load_limb(in+24) >> 12) & 0x7ffffffffffffUL; +} + +/* Take a fully reduced polynomial form number and contract it into a + * little-endian, 32-byte array + */ +static void fcontract(uint8_t *output, const felem input) +{ + uint128_t t[5]; + + t[0] = input[0]; + t[1] = input[1]; + t[2] = input[2]; + t[3] = input[3]; + t[4] = input[4]; + + t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffffUL; + t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffffUL; + t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffffUL; + t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffffUL; + t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffffUL; + + t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffffUL; + t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffffUL; + t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffffUL; + t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffffUL; + t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffffUL; + + /* now t is between 0 and 2^255-1, properly carried. */ + /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */ + + t[0] += 19; + + t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffffUL; + t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffffUL; + t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffffUL; + t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffffUL; + t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffffUL; + + /* now between 19 and 2^255-1 in both cases, and offset by 19. */ + + t[0] += 0x8000000000000UL - 19; + t[1] += 0x8000000000000UL - 1; + t[2] += 0x8000000000000UL - 1; + t[3] += 0x8000000000000UL - 1; + t[4] += 0x8000000000000UL - 1; + + /* now between 2^255 and 2^256-20, and offset by 2^255. */ + + t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffffUL; + t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffffUL; + t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffffUL; + t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffffUL; + t[4] &= 0x7ffffffffffffUL; + + store_limb(output, t[0] | (t[1] << 51)); + store_limb(output+8, (t[1] >> 13) | (t[2] << 38)); + store_limb(output+16, (t[2] >> 26) | (t[3] << 25)); + store_limb(output+24, (t[3] >> 39) | (t[4] << 12)); +} + +/* Input: Q, Q', Q-Q' + * Output: 2Q, Q+Q' + * + * x2 z3: long form + * x3 z3: long form + * x z: short form, destroyed + * xprime zprime: short form, destroyed + * qmqp: short form, preserved + */ +static void fmonty(limb *x2, limb *z2, /* output 2Q */ + limb *x3, limb *z3, /* output Q + Q' */ + limb *x, limb *z, /* input Q */ + limb *xprime, limb *zprime, /* input Q' */ + const limb *qmqp /* input Q - Q' */) +{ + limb origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5], zzprime[5], zzzprime[5]; + + memcpy(origx, x, 5 * sizeof(limb)); + fsum(x, z); + fdifference_backwards(z, origx); // does x - z + + memcpy(origxprime, xprime, sizeof(limb) * 5); + fsum(xprime, zprime); + fdifference_backwards(zprime, origxprime); + fmul(xxprime, xprime, z); + fmul(zzprime, x, zprime); + memcpy(origxprime, xxprime, sizeof(limb) * 5); + fsum(xxprime, zzprime); + fdifference_backwards(zzprime, origxprime); + fsquare_times(x3, xxprime, 1); + fsquare_times(zzzprime, zzprime, 1); + fmul(z3, zzzprime, qmqp); + + fsquare_times(xx, x, 1); + fsquare_times(zz, z, 1); + fmul(x2, xx, zz); + fdifference_backwards(zz, xx); // does zz = xx - zz + fscalar_product(zzz, zz, 121665); + fsum(zzz, xx); + fmul(z2, zz, zzz); +} + +/* Maybe swap the contents of two limb arrays (@a and @b), each @len elements + * long. Perform the swap iff @swap is non-zero. + * + * This function performs the swap without leaking any side-channel + * information. + */ +static void swap_conditional(limb a[5], limb b[5], limb iswap) +{ + unsigned i; + const limb swap = -iswap; + + for (i = 0; i < 5; ++i) { + const limb x = swap & (a[i] ^ b[i]); + a[i] ^= x; + b[i] ^= x; + } +} + +/* Calculates nQ where Q is the x-coordinate of a point on the curve + * + * resultx/resultz: the x coordinate of the resulting curve point (short form) + * n: a little endian, 32-byte number + * q: a point of the curve (short form) + */ +static void cmult(limb *resultx, limb *resultz, const uint8_t *n, const limb *q) +{ + limb a[5] = {0}, b[5] = {1}, c[5] = {1}, d[5] = {0}; + limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t; + limb e[5] = {0}, f[5] = {1}, g[5] = {0}, h[5] = {1}; + limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h; + + unsigned i, j; + + memcpy(nqpqx, q, sizeof(limb) * 5); + + for (i = 0; i < 32; ++i) { + uint8_t byte = n[31 - i]; + for (j = 0; j < 8; ++j) { + const limb bit = byte >> 7; + + swap_conditional(nqx, nqpqx, bit); + swap_conditional(nqz, nqpqz, bit); + fmonty(nqx2, nqz2, + nqpqx2, nqpqz2, + nqx, nqz, + nqpqx, nqpqz, + q); + swap_conditional(nqx2, nqpqx2, bit); + swap_conditional(nqz2, nqpqz2, bit); + + t = nqx; + nqx = nqx2; + nqx2 = t; + t = nqz; + nqz = nqz2; + nqz2 = t; + t = nqpqx; + nqpqx = nqpqx2; + nqpqx2 = t; + t = nqpqz; + nqpqz = nqpqz2; + nqpqz2 = t; + + byte <<= 1; + } + } + + memcpy(resultx, nqx, sizeof(limb) * 5); + memcpy(resultz, nqz, sizeof(limb) * 5); +} + +static void crecip(felem out, const felem z) +{ + felem a,t0,b,c; + + /* 2 */ fsquare_times(a, z, 1); // a = 2 + /* 8 */ fsquare_times(t0, a, 2); + /* 9 */ fmul(b, t0, z); // b = 9 + /* 11 */ fmul(a, b, a); // a = 11 + /* 22 */ fsquare_times(t0, a, 1); + /* 2^5 - 2^0 = 31 */ fmul(b, t0, b); + /* 2^10 - 2^5 */ fsquare_times(t0, b, 5); + /* 2^10 - 2^0 */ fmul(b, t0, b); + /* 2^20 - 2^10 */ fsquare_times(t0, b, 10); + /* 2^20 - 2^0 */ fmul(c, t0, b); + /* 2^40 - 2^20 */ fsquare_times(t0, c, 20); + /* 2^40 - 2^0 */ fmul(t0, t0, c); + /* 2^50 - 2^10 */ fsquare_times(t0, t0, 10); + /* 2^50 - 2^0 */ fmul(b, t0, b); + /* 2^100 - 2^50 */ fsquare_times(t0, b, 50); + /* 2^100 - 2^0 */ fmul(c, t0, b); + /* 2^200 - 2^100 */ fsquare_times(t0, c, 100); + /* 2^200 - 2^0 */ fmul(t0, t0, c); + /* 2^250 - 2^50 */ fsquare_times(t0, t0, 50); + /* 2^250 - 2^0 */ fmul(t0, t0, b); + /* 2^255 - 2^5 */ fsquare_times(t0, t0, 5); + /* 2^255 - 21 */ fmul(out, t0, a); +} + +void curve25519(uint8_t mypublic[CURVE25519_POINT_SIZE], const uint8_t secret[CURVE25519_POINT_SIZE], const uint8_t basepoint[CURVE25519_POINT_SIZE]) +{ + limb bp[5], x[5], z[5], zmone[5]; + uint8_t e[32]; + + memcpy(e, secret, 32); + curve25519_normalize_secret(e); + + fexpand(bp, basepoint); + cmult(x, z, e, bp); + crecip(zmone, z); + fmul(z, x, zmone); + fcontract(mypublic, z); +} + +#else +typedef int64_t limb; + +/* Field element representation: + * + * Field elements are written as an array of signed, 64-bit limbs, least + * significant first. The value of the field element is: + * x[0] + 2^26·x[1] + x^51·x[2] + 2^102·x[3] + ... + * + * i.e. the limbs are 26, 25, 26, 25, ... bits wide. */ + +/* Sum two numbers: output += in */ +static void fsum(limb *output, const limb *in) +{ + unsigned i; + for (i = 0; i < 10; i += 2) { + output[0 + i] = output[0 + i] + in[0 + i]; + output[1 + i] = output[1 + i] + in[1 + i]; + } +} + +/* Find the difference of two numbers: output = in - output + * (note the order of the arguments!). */ +static void fdifference(limb *output, const limb *in) +{ + unsigned i; + for (i = 0; i < 10; ++i) { + output[i] = in[i] - output[i]; + } +} + +/* Multiply a number by a scalar: output = in * scalar */ +static void fscalar_product(limb *output, const limb *in, const limb scalar) +{ + unsigned i; + for (i = 0; i < 10; ++i) { + output[i] = in[i] * scalar; + } +} + +/* Multiply two numbers: output = in2 * in + * + * output must be distinct to both inputs. The inputs are reduced coefficient + * form, the output is not. + * + * output[x] <= 14 * the largest product of the input limbs. */ +static void fproduct(limb *output, const limb *in2, const limb *in) +{ + output[0] = ((limb) ((int32_t) in2[0])) * ((int32_t) in[0]); + output[1] = ((limb) ((int32_t) in2[0])) * ((int32_t) in[1]) + + ((limb) ((int32_t) in2[1])) * ((int32_t) in[0]); + output[2] = 2 * ((limb) ((int32_t) in2[1])) * ((int32_t) in[1]) + + ((limb) ((int32_t) in2[0])) * ((int32_t) in[2]) + + ((limb) ((int32_t) in2[2])) * ((int32_t) in[0]); + output[3] = ((limb) ((int32_t) in2[1])) * ((int32_t) in[2]) + + ((limb) ((int32_t) in2[2])) * ((int32_t) in[1]) + + ((limb) ((int32_t) in2[0])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[3])) * ((int32_t) in[0]); + output[4] = ((limb) ((int32_t) in2[2])) * ((int32_t) in[2]) + + 2 * (((limb) ((int32_t) in2[1])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[3])) * ((int32_t) in[1])) + + ((limb) ((int32_t) in2[0])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in2[4])) * ((int32_t) in[0]); + output[5] = ((limb) ((int32_t) in2[2])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[3])) * ((int32_t) in[2]) + + ((limb) ((int32_t) in2[1])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in2[4])) * ((int32_t) in[1]) + + ((limb) ((int32_t) in2[0])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[5])) * ((int32_t) in[0]); + output[6] = 2 * (((limb) ((int32_t) in2[3])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[1])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[5])) * ((int32_t) in[1])) + + ((limb) ((int32_t) in2[2])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in2[4])) * ((int32_t) in[2]) + + ((limb) ((int32_t) in2[0])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in2[6])) * ((int32_t) in[0]); + output[7] = ((limb) ((int32_t) in2[3])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in2[4])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[2])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[5])) * ((int32_t) in[2]) + + ((limb) ((int32_t) in2[1])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in2[6])) * ((int32_t) in[1]) + + ((limb) ((int32_t) in2[0])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[7])) * ((int32_t) in[0]); + output[8] = ((limb) ((int32_t) in2[4])) * ((int32_t) in[4]) + + 2 * (((limb) ((int32_t) in2[3])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[5])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[1])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[7])) * ((int32_t) in[1])) + + ((limb) ((int32_t) in2[2])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in2[6])) * ((int32_t) in[2]) + + ((limb) ((int32_t) in2[0])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in2[8])) * ((int32_t) in[0]); + output[9] = ((limb) ((int32_t) in2[4])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[5])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in2[3])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in2[6])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[2])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[7])) * ((int32_t) in[2]) + + ((limb) ((int32_t) in2[1])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in2[8])) * ((int32_t) in[1]) + + ((limb) ((int32_t) in2[0])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[0]); + output[10] = 2 * (((limb) ((int32_t) in2[5])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[3])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[7])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[1])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[1])) + + ((limb) ((int32_t) in2[4])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in2[6])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in2[2])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in2[8])) * ((int32_t) in[2]); + output[11] = ((limb) ((int32_t) in2[5])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in2[6])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[4])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[7])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in2[3])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in2[8])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in2[2])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[2]); + output[12] = ((limb) ((int32_t) in2[6])) * ((int32_t) in[6]) + + 2 * (((limb) ((int32_t) in2[5])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[7])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[3])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[3])) + + ((limb) ((int32_t) in2[4])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in2[8])) * ((int32_t) in[4]); + output[13] = ((limb) ((int32_t) in2[6])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[7])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in2[5])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in2[8])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in2[4])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[4]); + output[14] = 2 * (((limb) ((int32_t) in2[7])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[5])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[5])) + + ((limb) ((int32_t) in2[6])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in2[8])) * ((int32_t) in[6]); + output[15] = ((limb) ((int32_t) in2[7])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in2[8])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in2[6])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[6]); + output[16] = ((limb) ((int32_t) in2[8])) * ((int32_t) in[8]) + + 2 * (((limb) ((int32_t) in2[7])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[7])); + output[17] = ((limb) ((int32_t) in2[8])) * ((int32_t) in[9]) + + ((limb) ((int32_t) in2[9])) * ((int32_t) in[8]); + output[18] = 2 * ((limb) ((int32_t) in2[9])) * ((int32_t) in[9]); +} + +/* Reduce a long form to a short form by taking the input mod 2^255 - 19. + * + * On entry: |output[i]| < 14*2^54 + * On exit: |output[0..8]| < 280*2^54 */ +static void freduce_degree(limb *output) +{ + /* Each of these shifts and adds ends up multiplying the value by 19. + * + * For output[0..8], the absolute entry value is < 14*2^54 and we add, at + * most, 19*14*2^54 thus, on exit, |output[0..8]| < 280*2^54. */ + output[8] += output[18] << 4; + output[8] += output[18] << 1; + output[8] += output[18]; + output[7] += output[17] << 4; + output[7] += output[17] << 1; + output[7] += output[17]; + output[6] += output[16] << 4; + output[6] += output[16] << 1; + output[6] += output[16]; + output[5] += output[15] << 4; + output[5] += output[15] << 1; + output[5] += output[15]; + output[4] += output[14] << 4; + output[4] += output[14] << 1; + output[4] += output[14]; + output[3] += output[13] << 4; + output[3] += output[13] << 1; + output[3] += output[13]; + output[2] += output[12] << 4; + output[2] += output[12] << 1; + output[2] += output[12]; + output[1] += output[11] << 4; + output[1] += output[11] << 1; + output[1] += output[11]; + output[0] += output[10] << 4; + output[0] += output[10] << 1; + output[0] += output[10]; +} + +#if (-1 & 3) != 3 +#error "This code only works on a two's complement system" +#endif + +/* return v / 2^26, using only shifts and adds. + * + * On entry: v can take any value. */ +static inline limb div_by_2_26(const limb v) +{ + /* High word of v; no shift needed. */ + const uint32_t highword = (uint32_t) (((uint64_t) v) >> 32); + /* Set to all 1s if v was negative; else set to 0s. */ + const int32_t sign = ((int32_t) highword) >> 31; + /* Set to 0x3ffffff if v was negative; else set to 0. */ + const int32_t roundoff = ((uint32_t) sign) >> 6; + /* Should return v / (1<<26) */ + return (v + roundoff) >> 26; +} + +/* return v / (2^25), using only shifts and adds. + * + * On entry: v can take any value. */ +static inline limb div_by_2_25(const limb v) +{ + /* High word of v; no shift needed*/ + const uint32_t highword = (uint32_t) (((uint64_t) v) >> 32); + /* Set to all 1s if v was negative; else set to 0s. */ + const int32_t sign = ((int32_t) highword) >> 31; + /* Set to 0x1ffffff if v was negative; else set to 0. */ + const int32_t roundoff = ((uint32_t) sign) >> 7; + /* Should return v / (1<<25) */ + return (v + roundoff) >> 25; +} + +/* Reduce all coefficients of the short form input so that |x| < 2^26. + * + * On entry: |output[i]| < 280*2^54 */ +static void freduce_coefficients(limb *output) +{ + unsigned i; + + output[10] = 0; + + for (i = 0; i < 10; i += 2) { + limb over = div_by_2_26(output[i]); + /* The entry condition (that |output[i]| < 280*2^54) means that over is, at + * most, 280*2^28 in the first iteration of this loop. This is added to the + * next limb and we can approximate the resulting bound of that limb by + * 281*2^54. */ + output[i] -= over << 26; + output[i+1] += over; + + /* For the first iteration, |output[i+1]| < 281*2^54, thus |over| < + * 281*2^29. When this is added to the next limb, the resulting bound can + * be approximated as 281*2^54. + * + * For subsequent iterations of the loop, 281*2^54 remains a conservative + * bound and no overflow occurs. */ + over = div_by_2_25(output[i+1]); + output[i+1] -= over << 25; + output[i+2] += over; + } + /* Now |output[10]| < 281*2^29 and all other coefficients are reduced. */ + output[0] += output[10] << 4; + output[0] += output[10] << 1; + output[0] += output[10]; + + output[10] = 0; + + /* Now output[1..9] are reduced, and |output[0]| < 2^26 + 19*281*2^29 + * So |over| will be no more than 2^16. */ + { + limb over = div_by_2_26(output[0]); + output[0] -= over << 26; + output[1] += over; + } + + /* Now output[0,2..9] are reduced, and |output[1]| < 2^25 + 2^16 < 2^26. The + * bound on |output[1]| is sufficient to meet our needs. */ +} + +/* A helpful wrapper around fproduct: output = in * in2. + * + * On entry: |in[i]| < 2^27 and |in2[i]| < 2^27. + * + * output must be distinct to both inputs. The output is reduced degree + * (indeed, one need only provide storage for 10 limbs) and |output[i]| < 2^26. */ +static void fmul(limb *output, const limb *in, const limb *in2) +{ + limb t[19]; + fproduct(t, in, in2); + /* |t[i]| < 14*2^54 */ + freduce_degree(t); + freduce_coefficients(t); + /* |t[i]| < 2^26 */ + memcpy(output, t, sizeof(limb) * 10); +} + +/* Square a number: output = in**2 + * + * output must be distinct from the input. The inputs are reduced coefficient + * form, the output is not. + * + * output[x] <= 14 * the largest product of the input limbs. */ +static void fsquare_inner(limb *output, const limb *in) +{ + output[0] = ((limb) ((int32_t) in[0])) * ((int32_t) in[0]); + output[1] = 2 * ((limb) ((int32_t) in[0])) * ((int32_t) in[1]); + output[2] = 2 * (((limb) ((int32_t) in[1])) * ((int32_t) in[1]) + + ((limb) ((int32_t) in[0])) * ((int32_t) in[2])); + output[3] = 2 * (((limb) ((int32_t) in[1])) * ((int32_t) in[2]) + + ((limb) ((int32_t) in[0])) * ((int32_t) in[3])); + output[4] = ((limb) ((int32_t) in[2])) * ((int32_t) in[2]) + + 4 * ((limb) ((int32_t) in[1])) * ((int32_t) in[3]) + + 2 * ((limb) ((int32_t) in[0])) * ((int32_t) in[4]); + output[5] = 2 * (((limb) ((int32_t) in[2])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in[1])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in[0])) * ((int32_t) in[5])); + output[6] = 2 * (((limb) ((int32_t) in[3])) * ((int32_t) in[3]) + + ((limb) ((int32_t) in[2])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in[0])) * ((int32_t) in[6]) + + 2 * ((limb) ((int32_t) in[1])) * ((int32_t) in[5])); + output[7] = 2 * (((limb) ((int32_t) in[3])) * ((int32_t) in[4]) + + ((limb) ((int32_t) in[2])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in[1])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in[0])) * ((int32_t) in[7])); + output[8] = ((limb) ((int32_t) in[4])) * ((int32_t) in[4]) + + 2 * (((limb) ((int32_t) in[2])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in[0])) * ((int32_t) in[8]) + + 2 * (((limb) ((int32_t) in[1])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in[3])) * ((int32_t) in[5]))); + output[9] = 2 * (((limb) ((int32_t) in[4])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in[3])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in[2])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in[1])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in[0])) * ((int32_t) in[9])); + output[10] = 2 * (((limb) ((int32_t) in[5])) * ((int32_t) in[5]) + + ((limb) ((int32_t) in[4])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in[2])) * ((int32_t) in[8]) + + 2 * (((limb) ((int32_t) in[3])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in[1])) * ((int32_t) in[9]))); + output[11] = 2 * (((limb) ((int32_t) in[5])) * ((int32_t) in[6]) + + ((limb) ((int32_t) in[4])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in[3])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in[2])) * ((int32_t) in[9])); + output[12] = ((limb) ((int32_t) in[6])) * ((int32_t) in[6]) + + 2 * (((limb) ((int32_t) in[4])) * ((int32_t) in[8]) + + 2 * (((limb) ((int32_t) in[5])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in[3])) * ((int32_t) in[9]))); + output[13] = 2 * (((limb) ((int32_t) in[6])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in[5])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in[4])) * ((int32_t) in[9])); + output[14] = 2 * (((limb) ((int32_t) in[7])) * ((int32_t) in[7]) + + ((limb) ((int32_t) in[6])) * ((int32_t) in[8]) + + 2 * ((limb) ((int32_t) in[5])) * ((int32_t) in[9])); + output[15] = 2 * (((limb) ((int32_t) in[7])) * ((int32_t) in[8]) + + ((limb) ((int32_t) in[6])) * ((int32_t) in[9])); + output[16] = ((limb) ((int32_t) in[8])) * ((int32_t) in[8]) + + 4 * ((limb) ((int32_t) in[7])) * ((int32_t) in[9]); + output[17] = 2 * ((limb) ((int32_t) in[8])) * ((int32_t) in[9]); + output[18] = 2 * ((limb) ((int32_t) in[9])) * ((int32_t) in[9]); +} + +/* fsquare sets output = in^2. + * + * On entry: The |in| argument is in reduced coefficients form and |in[i]| < + * 2^27. + * + * On exit: The |output| argument is in reduced coefficients form (indeed, one + * need only provide storage for 10 limbs) and |out[i]| < 2^26. */ +static void fsquare(limb *output, const limb *in) +{ + limb t[19]; + fsquare_inner(t, in); + /* |t[i]| < 14*2^54 because the largest product of two limbs will be < + * 2^(27+27) and fsquare_inner adds together, at most, 14 of those + * products. */ + freduce_degree(t); + freduce_coefficients(t); + /* |t[i]| < 2^26 */ + memcpy(output, t, sizeof(limb) * 10); +} + +/* Take a little-endian, 32-byte number and expand it into polynomial form */ +static void fexpand(limb *output, const uint8_t *input) +{ +#define F(n,start,shift,mask) \ + output[n] = ((((limb) input[start + 0]) | \ + ((limb) input[start + 1]) << 8 | \ + ((limb) input[start + 2]) << 16 | \ + ((limb) input[start + 3]) << 24) >> shift) & mask; + F(0, 0, 0, 0x3ffffff); + F(1, 3, 2, 0x1ffffff); + F(2, 6, 3, 0x3ffffff); + F(3, 9, 5, 0x1ffffff); + F(4, 12, 6, 0x3ffffff); + F(5, 16, 0, 0x1ffffff); + F(6, 19, 1, 0x3ffffff); + F(7, 22, 3, 0x1ffffff); + F(8, 25, 4, 0x3ffffff); + F(9, 28, 6, 0x1ffffff); +#undef F +} + +#if (-32 >> 1) != -16 +#error "This code only works when >> does sign-extension on negative numbers" +#endif + +/* int32_t_eq returns 0xffffffff iff a == b and zero otherwise. */ +static int32_t int32_t_eq(int32_t a, int32_t b) +{ + a = ~(a ^ b); + a &= a << 16; + a &= a << 8; + a &= a << 4; + a &= a << 2; + a &= a << 1; + return a >> 31; +} + +/* int32_t_gte returns 0xffffffff if a >= b and zero otherwise, where a and b are + * both non-negative. */ +static int32_t int32_t_gte(int32_t a, int32_t b) +{ + a -= b; + /* a >= 0 iff a >= b. */ + return ~(a >> 31); +} + +/* Take a fully reduced polynomial form number and contract it into a + * little-endian, 32-byte array. + * + * On entry: |input_limbs[i]| < 2^26 */ +static void fcontract(uint8_t *output, limb *input_limbs) +{ + int i; + int j; + int32_t input[10]; + int32_t mask; + + /* |input_limbs[i]| < 2^26, so it's valid to convert to an int32_t. */ + for (i = 0; i < 10; i++) { + input[i] = input_limbs[i]; + } + + for (j = 0; j < 2; ++j) { + for (i = 0; i < 9; ++i) { + if ((i & 1) == 1) { + /* This calculation is a time-invariant way to make input[i] + * non-negative by borrowing from the next-larger limb. */ + const int32_t mask = input[i] >> 31; + const int32_t carry = -((input[i] & mask) >> 25); + input[i] = input[i] + (carry << 25); + input[i+1] = input[i+1] - carry; + } else { + const int32_t mask = input[i] >> 31; + const int32_t carry = -((input[i] & mask) >> 26); + input[i] = input[i] + (carry << 26); + input[i+1] = input[i+1] - carry; + } + } + + /* There's no greater limb for input[9] to borrow from, but we can multiply + * by 19 and borrow from input[0], which is valid mod 2^255-19. */ + { + const int32_t mask = input[9] >> 31; + const int32_t carry = -((input[9] & mask) >> 25); + input[9] = input[9] + (carry << 25); + input[0] = input[0] - (carry * 19); + } + + /* After the first iteration, input[1..9] are non-negative and fit within + * 25 or 26 bits, depending on position. However, input[0] may be + * negative. */ + } + + /* The first borrow-propagation pass above ended with every limb + except (possibly) input[0] non-negative. + If input[0] was negative after the first pass, then it was because of a + carry from input[9]. On entry, input[9] < 2^26 so the carry was, at most, + one, since (2**26-1) >> 25 = 1. Thus input[0] >= -19. + In the second pass, each limb is decreased by at most one. Thus the second + borrow-propagation pass could only have wrapped around to decrease + input[0] again if the first pass left input[0] negative *and* input[1] + through input[9] were all zero. In that case, input[1] is now 2^25 - 1, + and this last borrow-propagation step will leave input[1] non-negative. */ + { + const int32_t mask = input[0] >> 31; + const int32_t carry = -((input[0] & mask) >> 26); + input[0] = input[0] + (carry << 26); + input[1] = input[1] - carry; + } + + /* All input[i] are now non-negative. However, there might be values between + * 2^25 and 2^26 in a limb which is, nominally, 25 bits wide. */ + for (j = 0; j < 2; j++) { + for (i = 0; i < 9; i++) { + if ((i & 1) == 1) { + const int32_t carry = input[i] >> 25; + input[i] &= 0x1ffffff; + input[i+1] += carry; + } else { + const int32_t carry = input[i] >> 26; + input[i] &= 0x3ffffff; + input[i+1] += carry; + } + } + + { + const int32_t carry = input[9] >> 25; + input[9] &= 0x1ffffff; + input[0] += 19*carry; + } + } + + /* If the first carry-chain pass, just above, ended up with a carry from + * input[9], and that caused input[0] to be out-of-bounds, then input[0] was + * < 2^26 + 2*19, because the carry was, at most, two. + * + * If the second pass carried from input[9] again then input[0] is < 2*19 and + * the input[9] -> input[0] carry didn't push input[0] out of bounds. */ + + /* It still remains the case that input might be between 2^255-19 and 2^255. + * In this case, input[1..9] must take their maximum value and input[0] must + * be >= (2^255-19) & 0x3ffffff, which is 0x3ffffed. */ + mask = int32_t_gte(input[0], 0x3ffffed); + for (i = 1; i < 10; i++) { + if ((i & 1) == 1) { + mask &= int32_t_eq(input[i], 0x1ffffff); + } else { + mask &= int32_t_eq(input[i], 0x3ffffff); + } + } + + /* mask is either 0xffffffff (if input >= 2^255-19) and zero otherwise. Thus + * this conditionally subtracts 2^255-19. */ + input[0] -= mask & 0x3ffffed; + + for (i = 1; i < 10; i++) { + if ((i & 1) == 1) { + input[i] -= mask & 0x1ffffff; + } else { + input[i] -= mask & 0x3ffffff; + } + } + + input[1] <<= 2; + input[2] <<= 3; + input[3] <<= 5; + input[4] <<= 6; + input[6] <<= 1; + input[7] <<= 3; + input[8] <<= 4; + input[9] <<= 6; +#define F(i, s) \ + output[s+0] |= input[i] & 0xff; \ + output[s+1] = (input[i] >> 8) & 0xff; \ + output[s+2] = (input[i] >> 16) & 0xff; \ + output[s+3] = (input[i] >> 24) & 0xff; + output[0] = 0; + output[16] = 0; + F(0,0); + F(1,3); + F(2,6); + F(3,9); + F(4,12); + F(5,16); + F(6,19); + F(7,22); + F(8,25); + F(9,28); +#undef F +} + +/* Input: Q, Q', Q-Q' + * Output: 2Q, Q+Q' + * + * x2 z3: long form + * x3 z3: long form + * x z: short form, destroyed + * xprime zprime: short form, destroyed + * qmqp: short form, preserved + * + * On entry and exit, the absolute value of the limbs of all inputs and outputs + * are < 2^26. */ +static void fmonty(limb *x2, limb *z2, /* output 2Q */ + limb *x3, limb *z3, /* output Q + Q' */ + limb *x, limb *z, /* input Q */ + limb *xprime, limb *zprime, /* input Q' */ + const limb *qmqp /* input Q - Q' */) +{ + limb origx[10], origxprime[10], zzz[19], xx[19], zz[19], xxprime[19], + zzprime[19], zzzprime[19], xxxprime[19]; + + memcpy(origx, x, 10 * sizeof(limb)); + fsum(x, z); + /* |x[i]| < 2^27 */ + fdifference(z, origx); /* does x - z */ + /* |z[i]| < 2^27 */ + + memcpy(origxprime, xprime, sizeof(limb) * 10); + fsum(xprime, zprime); + /* |xprime[i]| < 2^27 */ + fdifference(zprime, origxprime); + /* |zprime[i]| < 2^27 */ + fproduct(xxprime, xprime, z); + /* |xxprime[i]| < 14*2^54: the largest product of two limbs will be < + * 2^(27+27) and fproduct adds together, at most, 14 of those products. + * (Approximating that to 2^58 doesn't work out.) */ + fproduct(zzprime, x, zprime); + /* |zzprime[i]| < 14*2^54 */ + freduce_degree(xxprime); + freduce_coefficients(xxprime); + /* |xxprime[i]| < 2^26 */ + freduce_degree(zzprime); + freduce_coefficients(zzprime); + /* |zzprime[i]| < 2^26 */ + memcpy(origxprime, xxprime, sizeof(limb) * 10); + fsum(xxprime, zzprime); + /* |xxprime[i]| < 2^27 */ + fdifference(zzprime, origxprime); + /* |zzprime[i]| < 2^27 */ + fsquare(xxxprime, xxprime); + /* |xxxprime[i]| < 2^26 */ + fsquare(zzzprime, zzprime); + /* |zzzprime[i]| < 2^26 */ + fproduct(zzprime, zzzprime, qmqp); + /* |zzprime[i]| < 14*2^52 */ + freduce_degree(zzprime); + freduce_coefficients(zzprime); + /* |zzprime[i]| < 2^26 */ + memcpy(x3, xxxprime, sizeof(limb) * 10); + memcpy(z3, zzprime, sizeof(limb) * 10); + + fsquare(xx, x); + /* |xx[i]| < 2^26 */ + fsquare(zz, z); + /* |zz[i]| < 2^26 */ + fproduct(x2, xx, zz); + /* |x2[i]| < 14*2^52 */ + freduce_degree(x2); + freduce_coefficients(x2); + /* |x2[i]| < 2^26 */ + fdifference(zz, xx); // does zz = xx - zz + /* |zz[i]| < 2^27 */ + memset(zzz + 10, 0, sizeof(limb) * 9); + fscalar_product(zzz, zz, 121665); + /* |zzz[i]| < 2^(27+17) */ + /* No need to call freduce_degree here: + fscalar_product doesn't increase the degree of its input. */ + freduce_coefficients(zzz); + /* |zzz[i]| < 2^26 */ + fsum(zzz, xx); + /* |zzz[i]| < 2^27 */ + fproduct(z2, zz, zzz); + /* |z2[i]| < 14*2^(26+27) */ + freduce_degree(z2); + freduce_coefficients(z2); + /* |z2|i| < 2^26 */ +} + +/* Conditionally swap two reduced-form limb arrays if 'iswap' is 1, but leave + * them unchanged if 'iswap' is 0. Runs in data-invariant time to avoid + * side-channel attacks. + * + * NOTE that this function requires that 'iswap' be 1 or 0; other values give + * wrong results. Also, the two limb arrays must be in reduced-coefficient, + * reduced-degree form: the values in a[10..19] or b[10..19] aren't swapped, + * and all all values in a[0..9],b[0..9] must have magnitude less than + * INT32_MAX. */ +static void swap_conditional(limb a[19], limb b[19], limb iswap) +{ + unsigned i; + const int32_t swap = (int32_t) -iswap; + + for (i = 0; i < 10; ++i) { + const int32_t x = swap & ( ((int32_t)a[i]) ^ ((int32_t)b[i]) ); + a[i] = ((int32_t)a[i]) ^ x; + b[i] = ((int32_t)b[i]) ^ x; + } +} + +/* Calculates nQ where Q is the x-coordinate of a point on the curve + * + * resultx/resultz: the x coordinate of the resulting curve point (short form) + * n: a little endian, 32-byte number + * q: a point of the curve (short form) */ +static void cmult(limb *resultx, limb *resultz, const uint8_t *n, const limb *q) +{ + limb a[19] = {0}, b[19] = {1}, c[19] = {1}, d[19] = {0}; + limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t; + limb e[19] = {0}, f[19] = {1}, g[19] = {0}, h[19] = {1}; + limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h; + + unsigned i, j; + + memcpy(nqpqx, q, sizeof(limb) * 10); + + for (i = 0; i < 32; ++i) { + uint8_t byte = n[31 - i]; + for (j = 0; j < 8; ++j) { + const limb bit = byte >> 7; + + swap_conditional(nqx, nqpqx, bit); + swap_conditional(nqz, nqpqz, bit); + fmonty(nqx2, nqz2, + nqpqx2, nqpqz2, + nqx, nqz, + nqpqx, nqpqz, + q); + swap_conditional(nqx2, nqpqx2, bit); + swap_conditional(nqz2, nqpqz2, bit); + + t = nqx; + nqx = nqx2; + nqx2 = t; + t = nqz; + nqz = nqz2; + nqz2 = t; + t = nqpqx; + nqpqx = nqpqx2; + nqpqx2 = t; + t = nqpqz; + nqpqz = nqpqz2; + nqpqz2 = t; + + byte <<= 1; + } + } + + memcpy(resultx, nqx, sizeof(limb) * 10); + memcpy(resultz, nqz, sizeof(limb) * 10); +} + +static void crecip(limb *out, const limb *z) +{ + limb z2[10]; + limb z9[10]; + limb z11[10]; + limb z2_5_0[10]; + limb z2_10_0[10]; + limb z2_20_0[10]; + limb z2_50_0[10]; + limb z2_100_0[10]; + limb t0[10]; + limb t1[10]; + int i; + + /* 2 */ fsquare(z2,z); + /* 4 */ fsquare(t1,z2); + /* 8 */ fsquare(t0,t1); + /* 9 */ fmul(z9,t0,z); + /* 11 */ fmul(z11,z9,z2); + /* 22 */ fsquare(t0,z11); + /* 2^5 - 2^0 = 31 */ fmul(z2_5_0,t0,z9); + + /* 2^6 - 2^1 */ fsquare(t0,z2_5_0); + /* 2^7 - 2^2 */ fsquare(t1,t0); + /* 2^8 - 2^3 */ fsquare(t0,t1); + /* 2^9 - 2^4 */ fsquare(t1,t0); + /* 2^10 - 2^5 */ fsquare(t0,t1); + /* 2^10 - 2^0 */ fmul(z2_10_0,t0,z2_5_0); + + /* 2^11 - 2^1 */ fsquare(t0,z2_10_0); + /* 2^12 - 2^2 */ fsquare(t1,t0); + /* 2^20 - 2^10 */ for (i = 2; i < 10; i += 2) { fsquare(t0,t1); fsquare(t1,t0); } + /* 2^20 - 2^0 */ fmul(z2_20_0,t1,z2_10_0); + + /* 2^21 - 2^1 */ fsquare(t0,z2_20_0); + /* 2^22 - 2^2 */ fsquare(t1,t0); + /* 2^40 - 2^20 */ for (i = 2; i < 20; i += 2) { fsquare(t0,t1); fsquare(t1,t0); } + /* 2^40 - 2^0 */ fmul(t0,t1,z2_20_0); + + /* 2^41 - 2^1 */ fsquare(t1,t0); + /* 2^42 - 2^2 */ fsquare(t0,t1); + /* 2^50 - 2^10 */ for (i = 2; i < 10; i += 2) { fsquare(t1,t0); fsquare(t0,t1); } + /* 2^50 - 2^0 */ fmul(z2_50_0,t0,z2_10_0); + + /* 2^51 - 2^1 */ fsquare(t0,z2_50_0); + /* 2^52 - 2^2 */ fsquare(t1,t0); + /* 2^100 - 2^50 */ for (i = 2; i < 50; i += 2) { fsquare(t0,t1); fsquare(t1,t0); } + /* 2^100 - 2^0 */ fmul(z2_100_0,t1,z2_50_0); + + /* 2^101 - 2^1 */ fsquare(t1,z2_100_0); + /* 2^102 - 2^2 */ fsquare(t0,t1); + /* 2^200 - 2^100 */ for (i = 2; i < 100; i += 2) { fsquare(t1,t0); fsquare(t0,t1); } + /* 2^200 - 2^0 */ fmul(t1,t0,z2_100_0); + + /* 2^201 - 2^1 */ fsquare(t0,t1); + /* 2^202 - 2^2 */ fsquare(t1,t0); + /* 2^250 - 2^50 */ for (i = 2; i < 50; i += 2) { fsquare(t0,t1); fsquare(t1,t0); } + /* 2^250 - 2^0 */ fmul(t0,t1,z2_50_0); + + /* 2^251 - 2^1 */ fsquare(t1,t0); + /* 2^252 - 2^2 */ fsquare(t0,t1); + /* 2^253 - 2^3 */ fsquare(t1,t0); + /* 2^254 - 2^4 */ fsquare(t0,t1); + /* 2^255 - 2^5 */ fsquare(t1,t0); + /* 2^255 - 21 */ fmul(out,t1,z11); +} + +void curve25519(uint8_t mypublic[CURVE25519_POINT_SIZE], const uint8_t secret[CURVE25519_POINT_SIZE], const uint8_t basepoint[CURVE25519_POINT_SIZE]) +{ + limb bp[10], x[10], z[11], zmone[10]; + uint8_t e[32]; + + memcpy(e, secret, 32); + curve25519_normalize_secret(e); + + fexpand(bp, basepoint); + cmult(x, z, e, bp); + crecip(zmone, z); + fmul(z, x, zmone); + fcontract(mypublic, z); +} +#endif + +void curve25519_generate_public(uint8_t *pub, const uint8_t *secret) +{ + static const uint8_t basepoint[CURVE25519_POINT_SIZE] = { 9 }; + curve25519(pub, secret, basepoint); +} diff --git a/src/tools/curve25519.h b/src/tools/curve25519.h new file mode 100644 index 0000000..3c1404a --- /dev/null +++ b/src/tools/curve25519.h @@ -0,0 +1,22 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef CURVE25519_H +#define CURVE25519_H + +#include +#include + +enum curve25519_lengths { + CURVE25519_POINT_SIZE = 32, +}; + +void curve25519(uint8_t *mypublic, const uint8_t *secret, const uint8_t *basepoint); +void curve25519_generate_public(uint8_t *pub, const uint8_t *secret); +static inline void curve25519_normalize_secret(uint8_t secret[CURVE25519_POINT_SIZE]) +{ + secret[0] &= 248; + secret[31] &= 127; + secret[31] |= 64; +} + +#endif diff --git a/src/tools/genkey.c b/src/tools/genkey.c new file mode 100644 index 0000000..1602ae1 --- /dev/null +++ b/src/tools/genkey.c @@ -0,0 +1,59 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "curve25519.h" +#include "base64.h" + +#ifdef __NR_getrandom +static inline ssize_t get_random_bytes(uint8_t *out, size_t len) +{ + return syscall(__NR_getrandom, out, len, 0); +} +#else +#include +static inline ssize_t get_random_bytes(uint8_t *out, size_t len) +{ + ssize_t ret; + int fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) + return fd; + ret = read(fd, out, len); + close(fd); + return ret; +} +#endif + +int genkey_main(int argc, char *argv[]) +{ + unsigned char private_key[CURVE25519_POINT_SIZE]; + char private_key_base64[b64_len(CURVE25519_POINT_SIZE)]; + struct stat stat; + + if (!fstat(STDOUT_FILENO, &stat) && S_ISREG(stat.st_mode) && stat.st_mode & S_IRWXO) + fputs("Warning: writing to world accessible file.\nConsider setting the umask to 077 and trying again.\n", stderr); + + if (get_random_bytes(private_key, CURVE25519_POINT_SIZE) != CURVE25519_POINT_SIZE) { + perror("getrandom"); + return 1; + } + if (argc && !strcmp(argv[0], "genkey")) + curve25519_normalize_secret(private_key); + + if (b64_ntop(private_key, sizeof(private_key), private_key_base64, sizeof(private_key_base64)) < 0) { + errno = EINVAL; + perror("b64"); + return 1; + } + + puts(private_key_base64); + return 0; + +} diff --git a/src/tools/kernel.c b/src/tools/kernel.c new file mode 100644 index 0000000..0448308 --- /dev/null +++ b/src/tools/kernel.c @@ -0,0 +1,242 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kernel.h" +#include "../uapi.h" + +struct inflatable_buffer { + char *buffer; + char *next; + bool good; + size_t len; + size_t pos; +}; + +#define max(a, b) (a > b ? a : b) + +static int add_next_to_inflatable_buffer(struct inflatable_buffer *buffer) +{ + size_t len, expand_to; + char *new_buffer; + + if (!buffer->good || !buffer->next) { + free(buffer->next); + return 0; + } + + len = strlen(buffer->next) + 1; + + if (len == 1) + return 0; + + if (buffer->len - buffer->pos <= len) { + expand_to = max(buffer->len * 2, buffer->len + len + 1); + new_buffer = realloc(buffer->buffer, expand_to); + if (!new_buffer) { + free(buffer->next); + return -errno; + } + memset(&new_buffer[buffer->len], 0, expand_to - buffer->len); + buffer->buffer = new_buffer; + buffer->len = expand_to; + } + memcpy(&buffer->buffer[buffer->pos], buffer->next, len); + free(buffer->next); + buffer->pos += len; + return 0; +} + +static int parse_linkinfo(const struct nlattr *attr, void *data) +{ + struct inflatable_buffer *buffer = data; + if (mnl_attr_get_type(attr) == IFLA_INFO_KIND && !strcmp("wireguard", mnl_attr_get_str(attr))) + buffer->good = true; + return MNL_CB_OK; +} + +static int parse_infomsg(const struct nlattr *attr, void *data) +{ + struct inflatable_buffer *buffer = data; + if (mnl_attr_get_type(attr) == IFLA_LINKINFO) + return mnl_attr_parse_nested(attr, parse_linkinfo, data); + else if (mnl_attr_get_type(attr) == IFLA_IFNAME) + buffer->next = strdup(mnl_attr_get_str(attr)); + return MNL_CB_OK; +} + +static int read_devices_cb(const struct nlmsghdr *nlh, void *data) +{ + struct inflatable_buffer *buffer = data; + buffer->good = false; + buffer->next = NULL; + int ret = mnl_attr_parse(nlh, sizeof(struct ifinfomsg), parse_infomsg, data); + if (ret != MNL_CB_OK) + return ret; + ret = add_next_to_inflatable_buffer(buffer); + if (ret < 0) + return ret; + if (nlh->nlmsg_type != NLMSG_DONE) + return MNL_CB_OK + 1; + return MNL_CB_OK; +} + +/* first\0second\0third\0forth\0last\0\0 */ +char *kernel_get_wireguard_interfaces(void) +{ + struct mnl_socket *nl = NULL; + char *rtnl_buffer = NULL; + size_t message_len; + unsigned int portid, seq; + ssize_t len; + int ret = 0; + struct inflatable_buffer buffer = { 0 }; + struct nlmsghdr *nlh; + struct ifinfomsg *ifm; + + buffer.len = 4096; + buffer.buffer = calloc(buffer.len, 1); + if (!buffer.buffer) { + ret = -errno; + goto cleanup; + } + + rtnl_buffer = calloc(4096, 1); + if (!rtnl_buffer) { + ret = -errno; + goto cleanup; + } + + nl = mnl_socket_open(NETLINK_ROUTE); + if (!nl) { + ret = -errno; + goto cleanup; + } + + if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) { + ret = -errno; + goto cleanup; + } + + seq = time(NULL); + portid = mnl_socket_get_portid(nl); + nlh = mnl_nlmsg_put_header(rtnl_buffer); + nlh->nlmsg_type = RTM_GETLINK; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP; + nlh->nlmsg_seq = seq; + ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm)); + ifm->ifi_family = AF_UNSPEC; + message_len = nlh->nlmsg_len; + + if (mnl_socket_sendto(nl, rtnl_buffer, message_len) < 0) { + ret = -errno; + goto cleanup; + } + +another: + if ((len = mnl_socket_recvfrom(nl, rtnl_buffer, 4096)) < 0) { + ret = -errno; + goto cleanup; + } + if ((len = mnl_cb_run(rtnl_buffer, len, seq, portid, read_devices_cb, &buffer)) < 0) { + ret = -errno; + goto cleanup; + } + if (len == MNL_CB_OK + 1) + goto another; + +cleanup: + free(rtnl_buffer); + if (nl) + mnl_socket_close(nl); + errno = -ret; + if (errno) { + perror("Error when trying to get a list of Wireguard interfaces"); + free(buffer.buffer); + return NULL; + } + return buffer.buffer; +} + +bool kernel_has_wireguard_interface(const char *interface) +{ + char *interfaces, *this_interface; + this_interface = interfaces = kernel_get_wireguard_interfaces(); + if (!interfaces) + return false; + for (size_t len = 0; (len = strlen(this_interface)); this_interface += len + 1) { + if (!strcmp(interface, this_interface)) { + free(interfaces); + return true; + } + } + free(interfaces); + return false; +} + +static int do_ioctl(int req, struct ifreq *ifreq) +{ + static int fd = -1; + if (fd < 0) { + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return fd; + } + return ioctl(fd, req, ifreq); +} + +int kernel_set_device(struct wgdevice *dev) +{ + struct ifreq ifreq = { .ifr_data = (char *)dev }; + memcpy(&ifreq.ifr_name, dev->interface, IFNAMSIZ); + ifreq.ifr_name[IFNAMSIZ - 1] = 0; + return do_ioctl(WG_SET_DEVICE, &ifreq); +} + +int kernel_get_device(struct wgdevice **dev, const char *interface) +{ + int ret; + struct ifreq ifreq = { 0 }; + memcpy(&ifreq.ifr_name, interface, IFNAMSIZ); + ifreq.ifr_name[IFNAMSIZ - 1] = 0; + *dev = NULL; + do { + free(*dev); + ret = do_ioctl(WG_GET_DEVICE, &ifreq); + if (ret < 0) + goto out; + *dev = calloc(ret + sizeof(struct wgdevice), 1); + if (!*dev) { + perror("calloc"); + ret = -ENOMEM; + goto out; + } + (*dev)->peers_size = ret; + ifreq.ifr_data = (char *)*dev; + memcpy(&ifreq.ifr_name, interface, IFNAMSIZ); + ifreq.ifr_name[IFNAMSIZ - 1] = 0; + ret = do_ioctl(WG_GET_DEVICE, &ifreq); + } while (ret == -EMSGSIZE); + if (ret < 0) { + free(*dev); + *dev = NULL; + } +out: + errno = -ret; + return ret; +} diff --git a/src/tools/kernel.h b/src/tools/kernel.h new file mode 100644 index 0000000..0525ce1 --- /dev/null +++ b/src/tools/kernel.h @@ -0,0 +1,24 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef KERNEL_H +#define KERNEL_H + +#include + +struct wgdevice; + +int kernel_set_device(struct wgdevice *dev); +int kernel_get_device(struct wgdevice **dev, const char *interface); +char *kernel_get_wireguard_interfaces(void); +bool kernel_has_wireguard_interface(const char *interface); + + +#define for_each_wgpeer(__dev, __peer, __i) for ((__i) = 0, (__peer) = (typeof(__peer))((uint8_t *)(__dev) + sizeof(struct wgdevice)); \ + (__i) < (__dev)->num_peers; \ + ++(__i), (__peer) = (typeof(__peer))((uint8_t *)(__peer) + sizeof(struct wgpeer) + (sizeof(struct wgipmask) * (__peer)->num_ipmasks))) + +#define for_each_wgipmask(__peer, __ipmask, __i) for ((__i) = 0, (__ipmask) = (typeof(__ipmask))((uint8_t *)(__peer) + sizeof(struct wgpeer)); \ + (__i) < (__peer)->num_ipmasks; \ + ++(__i), (__ipmask) = (typeof(__ipmask))((uint8_t *)(__ipmask) + sizeof(struct wgipmask))) + +#endif diff --git a/src/tools/pubkey.c b/src/tools/pubkey.c new file mode 100644 index 0000000..d9a97d9 --- /dev/null +++ b/src/tools/pubkey.c @@ -0,0 +1,33 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include +#include +#include + +#include "curve25519.h" +#include "base64.h" + +int pubkey_main(__attribute__((unused)) int argc, __attribute__((unused)) char *argv[]) +{ + unsigned char private_key[CURVE25519_POINT_SIZE + 1] = { 0 }, public_key[CURVE25519_POINT_SIZE] = { 0 }; + char private_key_base64[b64_len(CURVE25519_POINT_SIZE)] = { 0 }, public_key_base64[b64_len(CURVE25519_POINT_SIZE)] = { 0 }; + + if (fread(private_key_base64, 1, sizeof(private_key_base64) - 1, stdin) != sizeof(private_key_base64) - 1) { + errno = EINVAL; + perror("fread(private key)"); + return 1; + } + if (b64_pton(private_key_base64, private_key, sizeof(private_key)) < 0) { + errno = EINVAL; + perror("b64"); + return 1; + } + curve25519_generate_public(public_key, private_key); + if (b64_ntop(public_key, sizeof(public_key), public_key_base64, sizeof(public_key_base64)) < 0) { + errno = EINVAL; + perror("b64"); + return 1; + } + puts(public_key_base64); + return 0; +} diff --git a/src/tools/set.c b/src/tools/set.c new file mode 100644 index 0000000..f85162d --- /dev/null +++ b/src/tools/set.c @@ -0,0 +1,35 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include +#include +#include +#include "subcommands.h" +#include "config.h" +#include "kernel.h" + +int set_main(int argc, char *argv[]) +{ + struct wgdevice *device = NULL; + int ret = 1; + + if (argc < 3) { + fprintf(stderr, "Usage: %s %s [listen-port ] [private-key ] [peer [remove] [endpoint :] [allowed-ips /[,/]...] ]...\n", PROG_NAME, argv[0]); + return 1; + } + + if (!config_read_cmd(&device, argv + 2, argc - 2)) + goto cleanup; + strncpy(device->interface, argv[1], IFNAMSIZ - 1); + device->interface[IFNAMSIZ - 1] = 0; + + if (kernel_set_device(device) != 0) { + perror("Unable to set device"); + goto cleanup; + } + + ret = 0; + +cleanup: + free(device); + return ret; +} diff --git a/src/tools/setconf.c b/src/tools/setconf.c new file mode 100644 index 0000000..81faa64 --- /dev/null +++ b/src/tools/setconf.c @@ -0,0 +1,61 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include +#include +#include +#include + +#include "config.h" +#include "kernel.h" +#include "subcommands.h" + +int setconf_main(int argc, char *argv[]) +{ + struct wgdevice *device = NULL; + struct config_ctx ctx; + FILE *config_input = NULL; + char *config_buffer = NULL; + size_t config_buffer_len = 0; + int ret = 1; + + if (argc != 3) { + fprintf(stderr, "Usage: %s %s \n", PROG_NAME, argv[0]); + return 1; + } + + config_input = fopen(argv[2], "r"); + if (!config_input) { + perror("fopen"); + return 1; + } + if (!config_read_init(&ctx, &device, !strcmp(argv[0], "addconf"))) { + fclose(config_input); + return 1; + } + while (getline(&config_buffer, &config_buffer_len, config_input) >= 0) { + if (!config_read_line(&ctx, config_buffer)) { + fprintf(stderr, "Configuration parsing error\n"); + goto cleanup; + } + } + if (!config_read_finish(&ctx) || !device) { + fprintf(stderr, "Invalid configuration\n"); + goto cleanup; + } + strncpy(device->interface, argv[1], IFNAMSIZ - 1); + device->interface[IFNAMSIZ - 1] = 0; + + if (kernel_set_device(device) != 0) { + perror("Unable to set device"); + goto cleanup; + } + + ret = 0; + +cleanup: + if (config_input) + fclose(config_input); + free(config_buffer); + free(device); + return ret; +} diff --git a/src/tools/show.c b/src/tools/show.c new file mode 100644 index 0000000..1662751 --- /dev/null +++ b/src/tools/show.c @@ -0,0 +1,366 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kernel.h" +#include "subcommands.h" +#include "terminal.h" +#include "base64.h" +#include "../uapi.h" + +static int peer_cmp(const void *first, const void *second) +{ + time_t diff; + const struct wgpeer *a = *(const void **)first, *b = *(const void **)second; + if (!a->last_handshake_time.tv_sec && !a->last_handshake_time.tv_usec && (b->last_handshake_time.tv_sec || b->last_handshake_time.tv_usec)) + return 1; + if (!b->last_handshake_time.tv_sec && !b->last_handshake_time.tv_usec && (a->last_handshake_time.tv_sec || a->last_handshake_time.tv_usec)) + return -1; + diff = a->last_handshake_time.tv_sec - b->last_handshake_time.tv_sec; + if (!diff) + diff = a->last_handshake_time.tv_usec - b->last_handshake_time.tv_usec; + if (diff < 0) + return 1; + if (diff > 0) + return -1; + return 0; +} + +static void sort_peers(struct wgdevice *device) +{ + uint8_t *new_device, *pos; + struct wgpeer **peers; + struct wgpeer *peer; + size_t i, len; + + peers = calloc(device->num_peers, sizeof(struct wgpeer *)); + if (!peers) + return; + + len = sizeof(struct wgdevice); + for_each_wgpeer(device, peer, i) + len += sizeof(struct wgpeer) + (peer->num_ipmasks * sizeof(struct wgipmask)); + pos = new_device = malloc(len); + if (!new_device) { + free(peers); + return; + } + + memcpy(pos, device, sizeof(struct wgdevice)); + pos += sizeof(struct wgdevice); + + for_each_wgpeer(device, peer, i) + peers[i] = peer; + + qsort(peers, device->num_peers, sizeof(struct wgpeer *), peer_cmp); + for (i = 0; i < device->num_peers; ++i) { + len = sizeof(struct wgpeer) + (peers[i]->num_ipmasks * sizeof(struct wgipmask)); + memcpy(pos, peers[i], len); + pos += len; + } + free(peers); + + memcpy(device, new_device, pos - new_device); + free(new_device); +} + +static const uint8_t zero[WG_KEY_LEN] = { 0 }; + +static char *key(const unsigned char key[WG_KEY_LEN]) +{ + static char b64[b64_len(WG_KEY_LEN)]; + if (!memcmp(key, zero, WG_KEY_LEN)) + return "(none)"; + memset(b64, 0, b64_len(WG_KEY_LEN)); + b64_ntop(key, WG_KEY_LEN, b64, b64_len(WG_KEY_LEN)); + return b64; +} + +static char *ip(const struct wgipmask *ip) +{ + static char buf[INET6_ADDRSTRLEN + 1]; + memset(buf, 0, INET6_ADDRSTRLEN + 1); + if (ip->family == AF_INET) + inet_ntop(AF_INET, &ip->ip4, buf, INET6_ADDRSTRLEN); + else if (ip->family == AF_INET6) + inet_ntop(AF_INET6, &ip->ip6, buf, INET6_ADDRSTRLEN); + return buf; +} + +static char *endpoint(const struct sockaddr_storage *addr) +{ + char host[4096 + 1]; + char service[512 + 1]; + static char buf[sizeof(host) + sizeof(service) + 4]; + int ret; + socklen_t addr_len = 0; + + memset(buf, 0, sizeof(buf)); + if (addr->ss_family == AF_INET) + addr_len = sizeof(struct sockaddr_in); + else if (addr->ss_family == AF_INET6) + addr_len = sizeof(struct sockaddr_in6); + + ret = getnameinfo((struct sockaddr *)addr, addr_len, host, sizeof(host), service, sizeof(service), NI_DGRAM | NI_NUMERICSERV | NI_NUMERICHOST); + if (ret) + strncpy(buf, gai_strerror(ret), sizeof(buf) - 1); + else + snprintf(buf, sizeof(buf) - 1, (addr->ss_family == AF_INET6 && strchr(host, ':')) ? "[%s]:%s" : "%s:%s", host, service); + return buf; +} + +static char *ago(const struct timeval *t) +{ + static char buf[1024]; + unsigned long long left, years, days, hours, minutes, seconds; + size_t offset = 0; + + left = time(NULL) - t->tv_sec; + years = left / (365 * 24 * 60 * 60); + left = left % (365 * 24 * 60 * 60); + days = left / (24 * 60 * 60); + left = left % (24 * 60 * 60); + hours = left / (60 * 60); + left = left % (60 * 60); + minutes = left / 60; + seconds = left % 60; + + if (years) + offset += snprintf(buf + offset, sizeof(buf) - offset, "%s%llu " TERMINAL_FG_CYAN "year%s" TERMINAL_RESET, offset ? ", " : "", years, years == 1 ? "" : "s"); + if (days) + offset += snprintf(buf + offset, sizeof(buf) - offset, "%s%llu " TERMINAL_FG_CYAN "day%s" TERMINAL_RESET, offset ? ", " : "", days, days == 1 ? "" : "s"); + if (hours) + offset += snprintf(buf + offset, sizeof(buf) - offset, "%s%llu " TERMINAL_FG_CYAN "hour%s" TERMINAL_RESET, offset ? ", " : "", hours, hours == 1 ? "" : "s"); + if (minutes) + offset += snprintf(buf + offset, sizeof(buf) - offset, "%s%llu " TERMINAL_FG_CYAN "minute%s" TERMINAL_RESET, offset ? ", " : "", minutes, minutes == 1 ? "" : "s"); + if (seconds) + offset += snprintf(buf + offset, sizeof(buf) - offset, "%s%llu " TERMINAL_FG_CYAN "second%s" TERMINAL_RESET, offset ? ", " : "", seconds, seconds == 1 ? "" : "s"); + if (offset) + snprintf(buf + offset, sizeof(buf) - offset, " ago"); + else + snprintf(buf, sizeof(buf), "Now"); + + return buf; +} + +static char *bytes(uint64_t b) +{ + static char buf[1024]; + + if (b < 1024ULL) + snprintf(buf, sizeof(buf), "%u " TERMINAL_FG_CYAN "B" TERMINAL_RESET, (unsigned)b); + else if (b < 1024ULL * 1024ULL) + snprintf(buf, sizeof(buf), "%.2f " TERMINAL_FG_CYAN "KiB" TERMINAL_RESET, (double)b / 1024); + else if (b < 1024ULL * 1024ULL * 1024ULL) + snprintf(buf, sizeof(buf), "%.2f " TERMINAL_FG_CYAN "MiB" TERMINAL_RESET, (double)b / (1024 * 1024)); + else if (b < 1024ULL * 1024ULL * 1024ULL * 1024ULL) + snprintf(buf, sizeof(buf), "%.2f " TERMINAL_FG_CYAN "GiB" TERMINAL_RESET, (double)b / (1024 * 1024 * 1024)); + else + snprintf(buf, sizeof(buf), "%.2f " TERMINAL_FG_CYAN "TiB" TERMINAL_RESET, (double)b / (1024 * 1024 * 1024) / 1024); + + return buf; +} + +static const char *COMMAND_NAME = NULL; +static void show_usage(void) +{ + fprintf(stderr, "Usage: %s %s { | all | interfaces } [public-key | private-key | preshared-key | listen-port | peers | endpoints | allowed-ips | latest-handshake | bandwidth]\n", PROG_NAME, COMMAND_NAME); +} + +static void pretty_print(struct wgdevice *device) +{ + size_t i, j; + struct wgpeer *peer; + struct wgipmask *ipmask; + + terminal_printf(TERMINAL_RESET); + terminal_printf(TERMINAL_FG_GREEN TERMINAL_BOLD "interface" TERMINAL_RESET ": " TERMINAL_FG_GREEN "%s" TERMINAL_RESET "\n", device->interface); + if (memcmp(device->public_key, zero, WG_KEY_LEN)) + terminal_printf(" " TERMINAL_BOLD "public key" TERMINAL_RESET ": %s\n", key(device->public_key)); + if (memcmp(device->private_key, zero, WG_KEY_LEN)) + terminal_printf(" " TERMINAL_BOLD "private key" TERMINAL_RESET ": %s\n", key(device->private_key)); + if (memcmp(device->preshared_key, zero, WG_KEY_LEN)) + terminal_printf(" " TERMINAL_BOLD "pre-shared key" TERMINAL_RESET ": %s\n", key(device->preshared_key)); + if (device->port) + terminal_printf(" " TERMINAL_BOLD "listening port" TERMINAL_RESET ": %u\n", device->port); + if (device->num_peers) { + sort_peers(device); + terminal_printf("\n"); + } + for_each_wgpeer(device, peer, i) { + terminal_printf(TERMINAL_FG_YELLOW TERMINAL_BOLD "peer" TERMINAL_RESET ": " TERMINAL_FG_YELLOW "%s" TERMINAL_RESET "\n", key(peer->public_key)); + if (peer->endpoint.ss_family == AF_INET || peer->endpoint.ss_family == AF_INET6) + terminal_printf(" " TERMINAL_BOLD "endpoint" TERMINAL_RESET ": %s\n", endpoint(&peer->endpoint)); + terminal_printf(" " TERMINAL_BOLD "allowed ips" TERMINAL_RESET ": "); + if (peer->num_ipmasks) { + for_each_wgipmask(peer, ipmask, j) + terminal_printf("%s" TERMINAL_FG_CYAN "/" TERMINAL_RESET "%u%s", ip(ipmask), ipmask->cidr, j == (size_t)peer->num_ipmasks - 1 ? "\n" : ", "); + } else + terminal_printf("(none)\n"); + if (peer->last_handshake_time.tv_sec) + terminal_printf(" " TERMINAL_BOLD "latest handshake" TERMINAL_RESET ": %s\n", ago(&peer->last_handshake_time)); + if (peer->rx_bytes || peer->tx_bytes) { + terminal_printf(" " TERMINAL_BOLD "bandwidth" TERMINAL_RESET ": "); + terminal_printf("%s received, ", bytes(peer->rx_bytes)); + terminal_printf("%s sent\n", bytes(peer->tx_bytes)); + } + if (i + 1 < device->num_peers) + terminal_printf("\n"); + } +} + +static bool ugly_print(struct wgdevice *device, const char *param, bool with_interface) +{ + size_t i, j; + struct wgpeer *peer; + struct wgipmask *ipmask; + if (!strcmp(param, "public-key")) { + if (with_interface) + printf("%s\t", device->interface); + printf("%s\n", key(device->public_key)); + } else if (!strcmp(param, "private-key")) { + if (with_interface) + printf("%s\t", device->interface); + printf("%s\n", key(device->private_key)); + } else if (!strcmp(param, "preshared-key")) { + if (with_interface) + printf("%s\t", device->interface); + printf("%s\n", key(device->preshared_key)); + } else if (!strcmp(param, "listen-port")) { + if (with_interface) + printf("%s\t", device->interface); + printf("%u\n", device->port); + } else if (!strcmp(param, "endpoints")) { + if (with_interface) + printf("%s\t", device->interface); + for_each_wgpeer(device, peer, i) { + printf("%s\t", key(peer->public_key)); + if (peer->endpoint.ss_family == AF_INET || peer->endpoint.ss_family == AF_INET6) + printf("%s\n", endpoint(&peer->endpoint)); + else + printf("(none)\n"); + } + } else if (!strcmp(param, "allowed-ips")) { + for_each_wgpeer(device, peer, i) { + if (with_interface) + printf("%s\t", device->interface); + printf("%s\t", key(peer->public_key)); + if (peer->num_ipmasks) { + for_each_wgipmask(peer, ipmask, j) + printf("%s/%u%s", ip(ipmask), ipmask->cidr, j == (size_t)peer->num_ipmasks - 1 ? "\n" : ", "); + } else + printf("(none)\n"); + } + } else if (!strcmp(param, "latest-handshakes")) { + for_each_wgpeer(device, peer, i) { + if (with_interface) + printf("%s\t", device->interface); + printf("%s\t%llu\n", key(peer->public_key), (unsigned long long)peer->last_handshake_time.tv_sec); + } + } else if (!strcmp(param, "bandwidth")) { + for_each_wgpeer(device, peer, i) { + if (with_interface) + printf("%s\t", device->interface); + printf("%s\t%" PRIu64 "\t%" PRIu64 "\n", key(peer->public_key), (uint64_t)peer->rx_bytes, (uint64_t)peer->tx_bytes); + } + } else if (!strcmp(param, "peers")) { + for_each_wgpeer(device, peer, i) { + if (with_interface) + printf("%s\t", device->interface); + printf("%s\n", key(peer->public_key)); + } + } else { + fprintf(stderr, "Invalid parameter: `%s`\n", param); + show_usage(); + return false; + } + return true; +} + +int show_main(int argc, char *argv[]) +{ + int ret = 0; + COMMAND_NAME = argv[0]; + + if (argc > 3) { + show_usage(); + return 1; + } + + if (argc == 1 || !strcmp(argv[1], "all")) { + char *interfaces = kernel_get_wireguard_interfaces(), *interface; + if (!interfaces) { + perror("Unable to get devices"); + return 1; + } + interface = interfaces; + for (size_t len = 0; (len = strlen(interface)); interface += len + 1) { + struct wgdevice *device = NULL; + if (kernel_get_device(&device, interface) < 0) { + perror("Unable to get device"); + continue; + } + if (argc == 3) { + if (!ugly_print(device, argv[2], true)) { + ret = 1; + free(device); + break; + } + } else { + pretty_print(device); + if (strlen(interface + len + 1)) + printf("\n"); + } + free(device); + } + free(interfaces); + } else if (!strcmp(argv[1], "interfaces")) { + char *interfaces, *interface; + if (argc > 2) { + show_usage(); + return 1; + } + interfaces = kernel_get_wireguard_interfaces(); + if (!interfaces) { + perror("Unable to get devices"); + return 1; + } + interface = interfaces; + for (size_t len = 0; (len = strlen(interface)); interface += len + 1) + printf("%s%c", interface, strlen(interface + len + 1) ? ' ' : '\n'); + free(interfaces); + } else if (argc == 2 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help") || !strcmp(argv[1], "help"))) + show_usage(); + else { + struct wgdevice *device = NULL; + if (!kernel_has_wireguard_interface(argv[1])) { + fprintf(stderr, "`%s` is not a valid WireGuard interface\n", argv[1]); + show_usage(); + return 1; + } + if (kernel_get_device(&device, argv[1]) < 0) { + perror("Unable to get device"); + show_usage(); + return 1; + } + if (argc == 3) { + if (!ugly_print(device, argv[2], false)) + ret = 1; + } else + pretty_print(device); + free(device); + } + return ret; +} diff --git a/src/tools/showconf.c b/src/tools/showconf.c new file mode 100644 index 0000000..faf2482 --- /dev/null +++ b/src/tools/showconf.c @@ -0,0 +1,102 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "subcommands.h" +#include "base64.h" +#include "kernel.h" +#include "../uapi.h" + +int showconf_main(int argc, char *argv[]) +{ + static const uint8_t zero[WG_KEY_LEN] = { 0 }; + char b64[b64_len(WG_KEY_LEN)] = { 0 }; + char ip[INET6_ADDRSTRLEN]; + struct wgdevice *device = NULL; + struct wgpeer *peer; + struct wgipmask *ipmask; + size_t i, j; + int ret = 1; + + if (argc != 2) { + fprintf(stderr, "Usage: %s %s \n", PROG_NAME, argv[0]); + return 1; + } + + if (!kernel_has_wireguard_interface(argv[1])) { + fprintf(stderr, "`%s` is not a valid WireGuard interface\n", argv[1]); + fprintf(stderr, "Usage: %s %s \n", PROG_NAME, argv[0]); + return 1; + } + + if (kernel_get_device(&device, argv[1])) { + perror("Unable to get device"); + goto cleanup; + } + + printf("[Interface]\n"); + if (device->port) + printf("ListenPort = %d\n", device->port); + if (memcmp(device->private_key, zero, WG_KEY_LEN)) { + b64_ntop(device->private_key, WG_KEY_LEN, b64, b64_len(WG_KEY_LEN)); + printf("PrivateKey = %s\n", b64); + } + if (memcmp(device->preshared_key, zero, WG_KEY_LEN)) { + b64_ntop(device->preshared_key, WG_KEY_LEN, b64, b64_len(WG_KEY_LEN)); + printf("PresharedKey = %s\n", b64); + } + printf("\n"); + for_each_wgpeer(device, peer, i) { + b64_ntop(peer->public_key, WG_KEY_LEN, b64, b64_len(WG_KEY_LEN)); + printf("[Peer]\nPublicKey = %s\n", b64); + if (peer->num_ipmasks) + printf("AllowedIPs = "); + for_each_wgipmask(peer, ipmask, j) { + if (ipmask->family == AF_INET) { + if (!inet_ntop(AF_INET, &ipmask->ip4, ip, INET6_ADDRSTRLEN)) + continue; + } else if (ipmask->family == AF_INET6) { + if (!inet_ntop(AF_INET6, &ipmask->ip6, ip, INET6_ADDRSTRLEN)) + continue; + } else + continue; + printf("%s/%d", ip, ipmask->cidr); + if (j + 1 < (size_t)peer->num_ipmasks) + printf(", "); + } + if (peer->num_ipmasks) + printf("\n"); + + if (peer->endpoint.ss_family == AF_INET || peer->endpoint.ss_family == AF_INET6) { + char host[4096 + 1]; + char service[512 + 1]; + static char buf[sizeof(host) + sizeof(service) + 4]; + socklen_t addr_len = 0; + memset(buf, 0, sizeof(buf)); + if (peer->endpoint.ss_family == AF_INET) + addr_len = sizeof(struct sockaddr_in); + else if (peer->endpoint.ss_family == AF_INET6) + addr_len = sizeof(struct sockaddr_in6); + if (!getnameinfo((struct sockaddr *)&peer->endpoint, addr_len, host, sizeof(host), service, sizeof(service), NI_DGRAM | NI_NUMERICSERV | NI_NUMERICHOST)) { + snprintf(buf, sizeof(buf) - 1, (peer->endpoint.ss_family == AF_INET6 && strchr(host, ':')) ? "[%s]:%s" : "%s:%s", host, service); + printf("Endpoint = %s\n", buf); + } + } + + if (i + 1 < device->num_peers) + printf("\n"); + } + ret = 0; + +cleanup: + free(device); + return ret; +} diff --git a/src/tools/subcommands.h b/src/tools/subcommands.h new file mode 100644 index 0000000..8351f8f --- /dev/null +++ b/src/tools/subcommands.h @@ -0,0 +1,14 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef SUBCOMMANDS_H +#define SUBCOMMANDS_H + +extern const char *PROG_NAME; +int show_main(int argc, char *argv[]); +int showconf_main(int argc, char *argv[]); +int set_main(int argc, char *argv[]); +int setconf_main(int argc, char *argv[]); +int genkey_main(int argc, char *argv[]); +int pubkey_main(int argc, char *argv[]); + +#endif diff --git a/src/tools/terminal.c b/src/tools/terminal.c new file mode 100644 index 0000000..74d04c2 --- /dev/null +++ b/src/tools/terminal.c @@ -0,0 +1,79 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +static bool color_mode(FILE *file) +{ + static int mode = -1; + char *var; + if (mode != -1) + return mode; + var = getenv("WG_COLOR_MODE"); + if (var && !strcmp(var, "always")) + mode = true; + else if (var && !strcmp(var, "never")) + mode = false; + else + return isatty(fileno(file)); + return mode; +} + +static void filter_ansi(FILE *file, const char *fmt, va_list args) +{ + char *str = NULL; + size_t len, i, j; + + if (color_mode(file)) { + vfprintf(file, fmt, args); + return; + } + + len = vasprintf(&str, fmt, args); + + if (len >= 2) { + for (i = 0; i < len - 2; ++i) { + if (str[i] == '\x1b' && str[i + 1] == '[') { + str[i] = str[i + 1] = '\0'; + for (j = i + 2; j < len; ++j) { + if (isalpha(str[j])) + break; + str[j] = '\0'; + } + str[j] = '\0'; + } + } + } + for (i = 0; i < len; i = j) { + fputs(&str[i], file); + for (j = i + strlen(&str[i]); j < len; ++j) { + if (str[j] != '\0') + break; + } + } + + free(str); +} + +void terminal_printf(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + filter_ansi(stdout, fmt, args); + va_end(args); +} + +void terminal_fprintf(FILE *file, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + filter_ansi(file, fmt, args); + va_end(args); +} diff --git a/src/tools/terminal.h b/src/tools/terminal.h new file mode 100644 index 0000000..825c057 --- /dev/null +++ b/src/tools/terminal.h @@ -0,0 +1,49 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef TERMINAL_H +#define TERMINAL_H + +#define TERMINAL_FG_BLACK "\x1b[30m" +#define TERMINAL_FG_RED "\x1b[31m" +#define TERMINAL_FG_GREEN "\x1b[32m" +#define TERMINAL_FG_YELLOW "\x1b[33m" +#define TERMINAL_FG_BLUE "\x1b[34m" +#define TERMINAL_FG_MAGENTA "\x1b[35m" +#define TERMINAL_FG_CYAN "\x1b[36m" +#define TERMINAL_FG_WHITE "\x1b[37m" +#define TERMINAL_FG_DEFAULT "\x1b[39m" + +#define TERMINAL_BG_BLACK "\x1b[40m" +#define TERMINAL_BG_RED "\x1b[41m" +#define TERMINAL_BG_GREEN "\x1b[42m" +#define TERMINAL_BG_YELLOW "\x1b[43m" +#define TERMINAL_BG_BLUE "\x1b[44m" +#define TERMINAL_BG_MAGENTA "\x1b[45m" +#define TERMINAL_BG_CYAN "\x1b[46m" +#define TERMINAL_BG_WHITE "\x1b[47m" +#define TERMINAL_BG_DEFAULT "\x1b[49m" + +#define TERMINAL_BOLD "\x1b[1m" +#define TERMINAL_NO_BOLD "\x1b[22m" +#define TERMINAL_UNDERLINE "\x1b[4m" +#define TERMINAL_NO_UNDERLINE "\x1b[24m" + +#define TERMINAL_RESET "\x1b[0m" + +#define TERMINAL_SAVE_CURSOR "\x1b[s" +#define TERMINAL_RESTORE_CURSOR "\x1b[u" +#define TERMINAL_UP_CURSOR(l) "\x1b[" #l "A" +#define TERMINAL_DOWN_CURSOR(l) "\x1b[" #l "B" +#define TERMINAL_RIGHT_CURSOR(c) "\x1b[" #c "C" +#define TERMINAL_LEFT_CURSOR(c) "\x1b[" #c "D" +#define TERMINAL_CLEAR_DOWN "\x1b[0J" +#define TERMINAL_CLEAR_UP "\x1b[1J" +#define TERMINAL_CLEAR_RIGHT "\x1b[0K" +#define TERMINAL_CLEAR_LEFT "\x1b[1K" +#define TERMINAL_CLEAR_LINE "\x1b[2K" +#define TERMINAL_CLEAR_ALL "\x1b[2J" + +void terminal_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +void terminal_fprintf(FILE *file, const char *fmt, ...) __attribute__((format(printf, 2, 3))); + +#endif diff --git a/src/tools/wg.8 b/src/tools/wg.8 new file mode 100644 index 0000000..4ee5027 --- /dev/null +++ b/src/tools/wg.8 @@ -0,0 +1,194 @@ +.TH WG 8 "2015 August 13" ZX2C4 "WireGuard" + +.SH NAME +wg - set and retrieve configuration of WireGuard interfaces + +.SH SYNOPSIS +.B wg +[ +.I COMMAND +] [ +.I OPTIONS +]... [ +.I ARGS +]... + +.SH DESCRIPTION + +.B wg +is the configuration utility for getting and setting the configuration of +WireGuard tunnel interfaces. The interfaces themselves can be added and removed +using +.BR ip-link (8) +and their IP addresses and routing tables can be set using +.BR ip-address (8) +and +.BR ip-route (8). +The +.B wg +utility provides a series of sub-commands for changing WireGuard-specific +aspects of WireGuard interfaces. + +If no COMMAND is specified, COMMAND defaults to +.BR show . +Sub-commands that take an INTERFACE must be passed a WireGuard interface. + +.SH COMMANDS + +.TP +\fBshow\fP { \fI\fP | \fIall\fP | \fIinterfaces\fP } [\fIpublic-key\fP | \fIprivate-key\fP | \fIpreshared-key\fP | \fIlisten-port\fP | \fIpeers\fP | \fIendpoints\fP | \fIallowed-ips\fP | \fIlatest-handshake\fP | \fIbandwidth\fP] +Shows current WireGuard configuration of specified \fI\fP. +If no \fI\fP is specified, \fI\fP defaults to \fIall\fP. +If \fIinterfaces\fP is specified, prints a list of all WireGuard interfaces, +one per line, and quit. If no options are given after the interface +specification, then prints a list of all attributes in a visually pleasing way +meant for the terminal. Otherwise, prints specified information grouped by +newlines and tabs, meant to be used in scripts. +.TP +\fBshowconf\fP \fI\fP +Shows the current configuration of \fI\fP in the format described +by \fICONFIGURATION FILE FORMAT\fP below. +.TP +\fBset\fP \fI\fP [\fIlisten-port\fP \fI\fP] [\fIprivate-key\fP \fI\fP] [\fIpreshared-key\fP \fI\fP] [\fIpeer\fP \fI\fP [\fIremove\fP] [\fIendpoint\fP \fI:\fP] [\fIallowed-ips\fP \fI/\fP[,\fI/\fP]...] ]... +Sets configuration values for the specified \fI\fP. Multiple +\fIpeer\fPs may be specified, and if the \fIremove\fP argument is given +for a peer, that peer is removed, not configured. If \fIlisten-port\fP +is not specified, the port will be automatically generated when the +interface comes up. Both \fIprivate-key\fP and \fIpreshared-key\fP must +be a files, for security reasons, but if you're using +.BR bash (1), +you may safely pass in a string by specifying as \fIprivate-key\fP or +\fIpreshared-key\fP the expression: <(echo PRIVATEKEYSTRING). If +\fI/dev/null\fP is specified as the filename for either \fIprivate-key\fP or +\fIpreshared-key\fP, the key is removed from the device. The use of +\fIpreshared-key\fP is optional, and may be omitted; it adds an additional +layer of symmetric-key cryptography to be mixed into the already existing +public-key cryptography, for post-quantum resistance. If \fIallowed-ips\fP +is specified, but the value is the empty string, all allowed ips are removed +from the peer. +.TP +\fBsetconf\fP \fI\fP \fI\fP +Sets the current configuration of \fI\fP to the contents of +\fI\fP, which must be in the format described +by \fICONFIGURATION FILE FORMAT\fP below. +.TP +\fBaddconf\fP \fI\fP \fI\fP +Appends the contents of \fI\fP, which must +be in the format described by \fICONFIGURATION FILE FORMAT\fP below, +to the current configuration of \fI\fP. +.TP +\fBgenkey\fP +Generates a random \fIprivate\fP key in base64 and prints it to +standard output. +.TP +\fBgenpsk\fP +Generates a random \fIpreshared\fP key in base64 and prints it to +standard output. +.TP +\fBpubkey\fP +Calculates a \fIpublic\fP key and prints it in base64 to standard +output from a corresponding \fIprivate\fP key (generated with +\fIgenkey\fP) given in base64 on standard input. + +A private key and a corresponding public key may be generated at once by calling: +.br + $ umask 077 +.br + $ wg genkey | tee private.key | wg pubkey > public.key +.TP +\fBhelp\fP +Show usage message. + +.SH CONFIGURATION FILE FORMAT +The configuration file format is based on \fIINI\fP. There are two top level sections +-- \fIInterface\fP and \fIPeer\fP. Multiple \fIPeer\fP sections may be specified, but +only one \fIInterface\fP section may be specified. + +.P +The \fIInterface\fP section contains two fields: +.IP \(bu +PrivateKey \(em a base64 private key generated by \fIwg genkey\fP. Required. +.IP \(bu +PresharedKey \(em a base64 preshared key generated by \fIwg genpsk\fP. Optional, +and may be omitted. This option adds an additional layer of symmetric-key +cryptography to be mixed into the already existing public-key cryptography, +for post-quantum resistance. +.IP \(bu +ListenPort \(em a 16-bit port for listening. Optional; if not specified, +automatically generated based on interface name. +.P +The \fIPeer\fP sections contain three fields each: +.IP \(bu +PublicKey \(em a base64 public key calculated by \fIwg pubkey\fP from a +private key, and usually transmitted out of band to the author of the +configuration file. Required. +.IP \(bu +AllowedIPs \(em a comma-separated list of IP (v4 or v6) addresses with +CIDR masks. The catch-all \fI0.0.0.0/0\fP may be specified for matching +all IPv4 addresses, and \fI::/0\fP may be specified for matching all +IPv6 addresses. Required. +.IP \(bu +Endpoint \(em an endpoint IP or hostname, followed by a comma, and then a +port number. Optional. + +.SH CONFIGURATION FILE FORMAT EXAMPLE +This example may be used as a model for writing configuration files. +Note that not all keys are required. + + [Interface] +.br + PrivateKey = yAnz5TF+lXXJte14tji3zlMNq+hd2rYUIgJBgB3fBmk= +.br + ListenPort = 41414 +.br + +.br + [Peer] +.br + PublicKey = xTIBA5rboUvnH4htodjb6e697QjLERt1NAB4mZqp8Dg= +.br + Endpoint = 192.95.5.67:1234 +.br + AllowedIPs = 10.192.122.3/32, 10.192.124.1/24 +.br + +.br + [Peer] +.br + PublicKey = TrMvSoP4jYQlY6RIzBgbssQqY3vxI2Pi+y71lOWWXX0= +.br + Endpoint = [2607:5300:60:6b0::c05f:543]:2468 +.br + AllowedIPs = 10.192.122.4/32, 192.168.0.0/16 +.br + +.br + [Peer] +.br + PublicKey = gN65BkIKy1eCE9pP1wdc8ROUtkHLF2PfAqYdyYBz6EA= +.br + Endpoint = test.wireguard.io:18981 +.br + AllowedIPs = 10.10.10.230/32 + +.SH ENVIRONMENT VARIABLES +.TP +.I WG_COLOR_MODE +If set to \fIalways\fP, always print ANSI colorized output. If set to \fInever\fP, never print ANSI colorized output. If set to \fIauto\fP, something invalid, or unset, then print ANSI colorized output only when writing to a TTY. + +.SH SEE ALSO +.BR ip (8), +.BR ip-link (8), +.BR ip-address (8), +.BR ip-route (8). + +.SH AUTHOR +.B wg +was written by +.MT Jason@zx2c4.com +Jason A. Donenfeld +.ME . +For updates and more information, a project page is available on the +.UR http://\:www.wireguard.io/ +World Wide Web +.UE . diff --git a/src/tools/wg.c b/src/tools/wg.c new file mode 100644 index 0000000..d4d2965 --- /dev/null +++ b/src/tools/wg.c @@ -0,0 +1,66 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#include +#include +#include + +#include "subcommands.h" + +const char *PROG_NAME; + +static const struct { + const char *subcommand; + int (*function)(int, char**); + const char *description; +} subcommands[] = { + { "show", show_main, "Shows the current configuration and device information" }, + { "showconf", showconf_main, "Shows the current configuration of a given WireGuard interface, for use with `setconf`" }, + { "set", set_main, "Change the current configuration, add peers, remove peers, or change peers" }, + { "setconf", setconf_main, "Applies a configuration file to a WireGuard interface" }, + { "addconf", setconf_main, "Appends a configuration file to a WireGuard interface" }, + { "genkey", genkey_main, "Generates a new private key and writes it to stdout" }, + { "genpsk", genkey_main, "Generates a new pre-shared key and writes it to stdout" }, + { "pubkey", pubkey_main, "Reads a private key from stdin and writes a public key to stdout" } +}; + +static void show_usage(void) +{ + fprintf(stderr, "Usage: %s []\n\n", PROG_NAME); + fprintf(stderr, "Available subcommands:\n"); + for (size_t i = 0; i < sizeof(subcommands) / sizeof(subcommands[0]); ++i) + fprintf(stderr, " %s: %s\n", subcommands[i].subcommand, subcommands[i].description); +} + +int main(int argc, char *argv[]) +{ + char *tmp = NULL; + PROG_NAME = argv[0]; + + if (argc == 2 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help") || !strcmp(argv[1], "help"))) { + show_usage(); + return 1; + } + + if (argc == 1) { + char *new_argv[] = { "show", NULL }; + return show_main(1, new_argv); + } + +findsubcommand: + for (size_t i = 0; i < sizeof(subcommands) / sizeof(subcommands[0]); ++i) { + if (!strcmp(argv[1], subcommands[i].subcommand)) + return subcommands[i].function(argc - 1, argv + 1); + } + + /* Crude way of supporting "wg wg0 show..." */ + if (!tmp && argc >= 3) { + tmp = argv[1]; + argv[1] = argv[2]; + argv[2] = tmp; + goto findsubcommand; + } + + fprintf(stderr, "Invalid subcommand: `%s`\n", argv[1]); + show_usage(); + return 1; +} diff --git a/src/uapi.h b/src/uapi.h new file mode 100644 index 0000000..21d4af1 --- /dev/null +++ b/src/uapi.h @@ -0,0 +1,122 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. + * + * Userspace API for WireGuard + * --------------------------- + * + * ioctl(WG_GET_DEVICE, { .ifr_name: "wg0", .ifr_data: NULL }): + * + * Returns the number of bytes required to hold the peers of a device (`ret_peers_size`). + * + * ioctl(WG_GET_DEVICE, { .ifr_name: "wg0", .ifr_data: user_pointer }): + * + * Retrevies device info, peer info, and ipmask info. + * + * `user_pointer` must point to a region of memory of size `sizeof(struct wgdevice) + ret_peers_size` + * and containing the structure `struct wgdevice { .peers_size: ret_peers_size }`. + * + * Writes to `user_pointer` a succession of structs: + * + * struct wgdevice { .num_peers = 3 } + * struct wgpeer { .num_ipmasks = 4 } + * struct wgipmask + * struct wgipmask + * struct wgipmask + * struct wgipmask + * struct wgpeer { .num_ipmasks = 2 } + * struct wgipmask + * struct wgipmask + * struct wgpeer { .num_ipmasks = 0 } + * + * Returns 0 on success. Returns -EMSGSIZE if there is too much data for the size of passed-in + * memory, in which case, this should be recalculated using the call above. Returns -errno if + * another error occured. + * + * ioctl(WG_SET_DEVICE, { .ifr_name: "wg0", .ifr_data: user_pointer }): + * + * Sets device info, peer info, and ipmask info. + * + * `user_pointer` must point to a region of memory containing a succession of structs: + * + * struct wgdevice { .num_peers = 3 } + * struct wgpeer { .num_ipmasks = 4 } + * struct wgipmask + * struct wgipmask + * struct wgipmask + * struct wgipmask + * struct wgpeer { .num_ipmasks = 2 } + * struct wgipmask + * struct wgipmask + * struct wgpeer { .num_ipmasks = 0 } + * + * If `wgdevice->replace_peer_list` is true, removes all peers of device before adding new ones. + * If `wgpeer->remove_me` is true, the peer identified by `wgpeer->public_key` is removed. + * If `wgpeer->replace_ipmasks` is true, removes all ipmasks before adding new ones. + * If `wgdevice->private_key` is filled with zeros, no action is taken on the private key. + * If `wgdevice->preshared_key` is filled with zeros, no action is taken on the pre-shared key. + * If `wgdevice->remove_private_key` is true, the private key is removed. + * If `wgdevice->remove_preshared_key` is true, the pre-shared key is removed. + * + * Returns 0 on success, or -errno if an error occurred. + */ + + +#ifndef WGUAPI_H +#define WGUAPI_H + +#include +#ifdef __KERNEL__ +#include +#include +#else +#include +#include +#endif + +#define WG_GET_DEVICE (SIOCDEVPRIVATE + 0) +#define WG_SET_DEVICE (SIOCDEVPRIVATE + 1) + +#define WG_KEY_LEN 32 + +struct wgipmask { + __s32 family; + union { + struct in_addr ip4; + struct in6_addr ip6; + }; + __u8 cidr; +}; + +struct wgpeer { + __u8 public_key[WG_KEY_LEN]; /* Get/Set */ + + struct sockaddr_storage endpoint; /* Get/Set */ + + struct timeval last_handshake_time; /* Get */ + __u64 rx_bytes, tx_bytes; /* Get */ + + __u32 remove_me : 1; /* Set */ + __u32 replace_ipmasks : 1; /* Set */ + + __u16 num_ipmasks; /* Get/Set */ +}; + +struct wgdevice { + char interface[IFNAMSIZ]; /* Get */ + + __u8 public_key[WG_KEY_LEN]; /* Get/Set */ + __u8 private_key[WG_KEY_LEN]; /* Get/Set */ + __u8 preshared_key[WG_KEY_LEN]; /* Get/Set */ + + __u16 port; /* Get/Set */ + + __u32 replace_peer_list : 1; /* Set */ + __u32 remove_private_key : 1; /* Set */ + __u32 remove_preshared_key : 1; /* Set */ + + union { + __u16 num_peers; /* Get/Set */ + __u64 peers_size; /* Get */ + }; +}; + +#endif diff --git a/src/wireguard.h b/src/wireguard.h new file mode 100644 index 0000000..5c83ab6 --- /dev/null +++ b/src/wireguard.h @@ -0,0 +1,83 @@ +/* Copyright 2015-2016 Jason A. Donenfeld . All Rights Reserved. */ + +#ifndef WIREGUARD_H +#define WIREGUARD_H + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0) +#error "WireGuard requires Linux >= 4.1" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "crypto/chacha20poly1305.h" +#include "crypto/curve25519.h" +#include "crypto/siphash24.h" +#include "noise.h" +#include "routing-table.h" +#include "hashtables.h" +#include "peer.h" +#include "cookie.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && !defined(DEBUG) && defined(net_dbg_ratelimited) +#undef net_dbg_ratelimited +#define net_dbg_ratelimited(fmt, ...) do { if (0) no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0) +#endif + +struct wireguard_device { + struct sock __rcu *sock4, *sock6; + u16 incoming_port; + struct net *creating_net; + struct workqueue_struct *workqueue; + struct workqueue_struct *parallelqueue; + struct padata_instance *parallel_send, *parallel_receive; + struct noise_static_identity static_identity; + struct sk_buff_head incoming_handshakes; + struct work_struct incoming_handshakes_work; + struct cookie_checker cookie_checker; + struct pubkey_hashtable peer_hashtable; + struct index_hashtable index_hashtable; + struct routing_table peer_routing_table; + struct list_head peer_list; + struct mutex device_update_lock; + struct mutex socket_update_lock; +}; + +/* Inverse of netdev_priv in include/linux/netdevice.h + * TODO: Try to get this function upstream, a la: https://lkml.org/lkml/2015/6/12/415 */ +static inline struct net_device *netdev_pub(void *dev) +{ + return (struct net_device *)((char *)dev - ALIGN(sizeof(struct net_device), NETDEV_ALIGN)); +} + +/* 64-bit jiffy functions. See include/linux/jiffies.h for the 32 bit ones these resemble. */ +static inline bool time_is_before_jiffies64(uint64_t a) +{ + return time_after64(get_jiffies_64(), a); +} +static inline bool time_is_after_jiffies64(uint64_t a) +{ + return time_before64(get_jiffies_64(), a); +} +static inline bool time_is_before_eq_jiffies64(uint64_t a) +{ + return time_after_eq64(get_jiffies_64(), a); +} +static inline bool time_is_after_eq_jiffies64(uint64_t a) +{ + return time_before_eq64(get_jiffies_64(), a); +} + +#endif