/* * SHA-512 hash in x86-64 assembly * * Copyright (c) 2021 Project Nayuki. (MIT License) * https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of * the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * - The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * - The Software is provided "as is", without warranty of any kind, express or * implied, including but not limited to the warranties of merchantability, * fitness for a particular purpose and noninfringement. In no event shall the * authors or copyright holders be liable for any claim, damages or other * liability, whether in an action of contract, tort or otherwise, arising from, * out of or in connection with the Software or the use or other dealings in the * Software. */ /* void sha512_compress(const uint8_t block[static 128], uint64_t state[static 8]) */ .globl sha512_compress sha512_compress: /* * Storage usage: * Bytes Location Description * 8 rax Temporary for calculation per round * 8 rbx Temporary for calculation per round * 8 rcx Temporary for calculation per round * 8 rdx Temporary for calculation per round * 8 rsi Base address of state array argument (read-only) * 8 rdi Base address of block array argument (read-only) * 8 rsp x86-64 stack pointer * 8 r8 SHA-512 state variable A * 8 r9 SHA-512 state variable B * 8 r10 SHA-512 state variable C * 8 r11 SHA-512 state variable D * 8 r12 SHA-512 state variable E * 8 r13 SHA-512 state variable F * 8 r14 SHA-512 state variable G * 8 r15 SHA-512 state variable H * 128 [rsp+0] Circular buffer of most recent 16 key schedule items, 8 bytes each * 16 xmm0 Caller's value of r10 (only low 64 bits are used) * 16 xmm1 Caller's value of r11 (only low 64 bits are used) * 16 xmm2 Caller's value of r12 (only low 64 bits are used) * 16 xmm3 Caller's value of r13 (only low 64 bits are used) * 16 xmm4 Caller's value of r14 (only low 64 bits are used) * 16 xmm5 Caller's value of r15 (only low 64 bits are used) * 16 xmm6 Caller's value of rbx (only low 64 bits are used) */ #define SCHED(i) (((i)&0xF)*8)(%rsp) #define ROUNDa(i, a, b, c, d, e, f, g, h, k) \ movq (i*8)(%rdi), %rbx; \ bswapq %rbx; \ movq %rbx, SCHED(i); \ ROUNDTAIL(a, b, c, d, e, f, g, h, k) #define ROUNDb(i, a, b, c, d, e, f, g, h, k) \ movq SCHED(i-15), %rax; \ movq SCHED(i-16), %rbx; \ addq SCHED(i- 7), %rbx; \ movq %rax, %rcx; \ movq %rax, %rdx; \ rorq $8, %rcx; \ shrq $7, %rdx; \ rorq $1, %rax; \ xorq %rdx, %rcx; \ xorq %rcx, %rax; \ addq %rax, %rbx; \ movq SCHED(i- 2), %rax; \ movq %rax, %rcx; \ movq %rax, %rdx; \ rorq $61, %rcx; \ shrq $6, %rdx; \ rorq $19, %rax; \ xorq %rdx, %rcx; \ xorq %rcx, %rax; \ addq %rax, %rbx; \ movq %rbx, SCHED(i); \ ROUNDTAIL(a, b, c, d, e, f, g, h, k) #define ROUNDTAIL(a, b, c, d, e, f, g, h, k) \ /* Part 0 */ \ movq %e, %rcx; \ movq %e, %rdx; \ movq %e, %rax; \ rorq $18, %rcx; \ rorq $41, %rdx; \ rorq $14, %rax; \ xorq %rdx, %rcx; \ xorq %rcx, %rax; \ addq %rbx, %h; \ movq %g, %rcx; \ xorq %f, %rcx; \ andq %e, %rcx; \ xorq %g, %rcx; \ addq %rax, %h; \ movabs $k, %rax; \ addq %rcx, %h; \ addq %rax, %h; \ /* Part 1 */ \ addq %h, %d; \ /* Part 2 */ \ movq %a, %rcx; \ movq %a, %rdx; \ movq %a, %rax; \ rorq $39, %rcx; \ rorq $34, %rdx; \ rorq $28, %rax; \ xorq %rdx, %rcx; \ xorq %rcx, %rax; \ movq %c, %rcx; \ addq %rax, %h; \ movq %c, %rax; \ orq %b, %rax; \ andq %b, %rcx; \ andq %a, %rax; \ orq %rcx, %rax; \ addq %rax, %h; /* Save registers, allocate scratch space */ movq %r10, %xmm0 movq %r11, %xmm1 movq %r12, %xmm2 movq %r13, %xmm3 movq %r14, %xmm4 movq %r15, %xmm5 movq %rbx, %xmm6 subq $128, %rsp /* Load state */ movq 0(%rsi), %r8 /* a */ movq 8(%rsi), %r9 /* b */ movq 16(%rsi), %r10 /* c */ movq 24(%rsi), %r11 /* d */ movq 32(%rsi), %r12 /* e */ movq 40(%rsi), %r13 /* f */ movq 48(%rsi), %r14 /* g */ movq 56(%rsi), %r15 /* h */ /* Do 80 rounds of hashing */ ROUNDa( 0, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x428A2F98D728AE22) ROUNDa( 1, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x7137449123EF65CD) ROUNDa( 2, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xB5C0FBCFEC4D3B2F) ROUNDa( 3, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xE9B5DBA58189DBBC) ROUNDa( 4, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x3956C25BF348B538) ROUNDa( 5, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x59F111F1B605D019) ROUNDa( 6, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x923F82A4AF194F9B) ROUNDa( 7, r9 , r10, r11, r12, r13, r14, r15, r8 , 0xAB1C5ED5DA6D8118) ROUNDa( 8, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xD807AA98A3030242) ROUNDa( 9, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x12835B0145706FBE) ROUNDa(10, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x243185BE4EE4B28C) ROUNDa(11, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x550C7DC3D5FFB4E2) ROUNDa(12, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x72BE5D74F27B896F) ROUNDa(13, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x80DEB1FE3B1696B1) ROUNDa(14, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x9BDC06A725C71235) ROUNDa(15, r9 , r10, r11, r12, r13, r14, r15, r8 , 0xC19BF174CF692694) ROUNDb(16, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xE49B69C19EF14AD2) ROUNDb(17, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xEFBE4786384F25E3) ROUNDb(18, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x0FC19DC68B8CD5B5) ROUNDb(19, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x240CA1CC77AC9C65) ROUNDb(20, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x2DE92C6F592B0275) ROUNDb(21, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x4A7484AA6EA6E483) ROUNDb(22, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x5CB0A9DCBD41FBD4) ROUNDb(23, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x76F988DA831153B5) ROUNDb(24, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x983E5152EE66DFAB) ROUNDb(25, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xA831C66D2DB43210) ROUNDb(26, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xB00327C898FB213F) ROUNDb(27, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xBF597FC7BEEF0EE4) ROUNDb(28, r12, r13, r14, r15, r8 , r9 , r10, r11, 0xC6E00BF33DA88FC2) ROUNDb(29, r11, r12, r13, r14, r15, r8 , r9 , r10, 0xD5A79147930AA725) ROUNDb(30, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x06CA6351E003826F) ROUNDb(31, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x142929670A0E6E70) ROUNDb(32, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x27B70A8546D22FFC) ROUNDb(33, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x2E1B21385C26C926) ROUNDb(34, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x4D2C6DFC5AC42AED) ROUNDb(35, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x53380D139D95B3DF) ROUNDb(36, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x650A73548BAF63DE) ROUNDb(37, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x766A0ABB3C77B2A8) ROUNDb(38, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x81C2C92E47EDAEE6) ROUNDb(39, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x92722C851482353B) ROUNDb(40, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xA2BFE8A14CF10364) ROUNDb(41, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xA81A664BBC423001) ROUNDb(42, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xC24B8B70D0F89791) ROUNDb(43, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xC76C51A30654BE30) ROUNDb(44, r12, r13, r14, r15, r8 , r9 , r10, r11, 0xD192E819D6EF5218) ROUNDb(45, r11, r12, r13, r14, r15, r8 , r9 , r10, 0xD69906245565A910) ROUNDb(46, r10, r11, r12, r13, r14, r15, r8 , r9 , 0xF40E35855771202A) ROUNDb(47, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x106AA07032BBD1B8) ROUNDb(48, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x19A4C116B8D2D0C8) ROUNDb(49, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x1E376C085141AB53) ROUNDb(50, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x2748774CDF8EEB99) ROUNDb(51, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x34B0BCB5E19B48A8) ROUNDb(52, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x391C0CB3C5C95A63) ROUNDb(53, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x4ED8AA4AE3418ACB) ROUNDb(54, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x5B9CCA4F7763E373) ROUNDb(55, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x682E6FF3D6B2B8A3) ROUNDb(56, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x748F82EE5DEFB2FC) ROUNDb(57, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x78A5636F43172F60) ROUNDb(58, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x84C87814A1F0AB72) ROUNDb(59, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x8CC702081A6439EC) ROUNDb(60, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x90BEFFFA23631E28) ROUNDb(61, r11, r12, r13, r14, r15, r8 , r9 , r10, 0xA4506CEBDE82BDE9) ROUNDb(62, r10, r11, r12, r13, r14, r15, r8 , r9 , 0xBEF9A3F7B2C67915) ROUNDb(63, r9 , r10, r11, r12, r13, r14, r15, r8 , 0xC67178F2E372532B) ROUNDb(64, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xCA273ECEEA26619C) ROUNDb(65, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xD186B8C721C0C207) ROUNDb(66, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xEADA7DD6CDE0EB1E) ROUNDb(67, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xF57D4F7FEE6ED178) ROUNDb(68, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x06F067AA72176FBA) ROUNDb(69, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x0A637DC5A2C898A6) ROUNDb(70, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x113F9804BEF90DAE) ROUNDb(71, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x1B710B35131C471B) ROUNDb(72, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x28DB77F523047D84) ROUNDb(73, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x32CAAB7B40C72493) ROUNDb(74, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x3C9EBE0A15C9BEBC) ROUNDb(75, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x431D67C49C100D4C) ROUNDb(76, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x4CC5D4BECB3E42B6) ROUNDb(77, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x597F299CFC657E2A) ROUNDb(78, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x5FCB6FAB3AD6FAEC) ROUNDb(79, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x6C44198C4A475817) /* Add to state */ addq %r8 , 0(%rsi) addq %r9 , 8(%rsi) addq %r10, 16(%rsi) addq %r11, 24(%rsi) addq %r12, 32(%rsi) addq %r13, 40(%rsi) addq %r14, 48(%rsi) addq %r15, 56(%rsi) /* Restore registers */ movq %xmm0, %r10 movq %xmm1, %r11 movq %xmm2, %r12 movq %xmm3, %r13 movq %xmm4, %r14 movq %xmm5, %r15 movq %xmm6, %rbx addq $128, %rsp retq