Rusty Russell
10 years ago
38 changed files with 6158 additions and 12 deletions
@ -1,3 +1,3 @@ |
|||
CCAN imported from http://ccodearchive.net. |
|||
|
|||
CCAN version: init-1956-ged95d86 |
|||
CCAN version: init-2039-g396f2fc |
|||
|
@ -0,0 +1,20 @@ |
|||
CCANDIR := ../../../../ |
|||
CFLAGS := -Wall -I$(CCANDIR) -O3 -flto -DCCAN_USE_ORIGINAL=1 |
|||
LDFLAGS := -O3 -flto |
|||
|
|||
INTEL_OBJS := sha256_avx1.o sha256_avx2_rorx2.o sha256_avx2_rorx8.o sha256_sse4.o |
|||
|
|||
double-sha-bench: double-sha-bench.o ccan-time.o $(INTEL_OBJS) #ccan-crypto-sha256.o
|
|||
|
|||
$(INTEL_OBJS): %.o : %.asm |
|||
|
|||
%.o : %.asm |
|||
yasm -f x64 -f elf64 -X gnu -g dwarf2 -D LINUX -o $@ $< |
|||
|
|||
clean: |
|||
$(RM) -f *.o |
|||
|
|||
ccan-crypto-sha256.o: $(CCANDIR)/ccan/crypto/sha256/sha256.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
|||
ccan-time.o: $(CCANDIR)/ccan/time/time.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
@ -0,0 +1,122 @@ |
|||
/* Bitcoin does a lot of SHA of SHA. Benchmark that. */ |
|||
#include <ccan/crypto/sha256/sha256.c> |
|||
#include <ccan/time/time.h> |
|||
#include <stdio.h> |
|||
|
|||
void sha256_avx(void *input_data, uint32_t digest[8], uint64_t num_blks); |
|||
void sha256_rorx(void *input_data, uint32_t digest[8], uint64_t num_blks); |
|||
void sha256_rorx_x8ms(void *input_data, uint32_t digest[8], uint64_t num_blks); |
|||
void sha256_sse4(void *input_data, uint32_t digest[8], uint64_t num_blks); |
|||
|
|||
int main(int argc, char *argv[]) |
|||
{ |
|||
struct timeabs start; |
|||
struct timerel diff; |
|||
size_t i, n; |
|||
union { |
|||
struct sha256 h; |
|||
uint32_t u32[16]; |
|||
uint8_t u8[64]; |
|||
} block; |
|||
|
|||
n = atoi(argv[1] ? argv[1] : "1000000"); |
|||
memset(&block, 0, sizeof(block)); |
|||
sha256(&block.h, &n, sizeof(n)); |
|||
|
|||
start = time_now(); |
|||
for (i = 0; i < n; i++) { |
|||
sha256(&block.h, &block.h, sizeof(block.h)); |
|||
} |
|||
diff = time_divide(time_between(time_now(), start), n); |
|||
printf("Normal gave %02x%02x%02x%02x%02x%02x... in %llu nsec\n", |
|||
block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2], |
|||
block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5], |
|||
(unsigned long long)time_to_nsec(diff)); |
|||
|
|||
/* Now, don't re-initalize every time; use Transform */ |
|||
memset(&block, 0, sizeof(block)); |
|||
sha256(&block.h, &n, sizeof(n)); |
|||
block.u8[sizeof(block.h)] = 0x80; |
|||
// Size is 256 bits
|
|||
block.u8[sizeof(block)-2] = 1; |
|||
|
|||
start = time_now(); |
|||
for (i = 0; i < n; i++) { |
|||
struct sha256_ctx ctx = SHA256_INIT; |
|||
size_t j; |
|||
Transform(ctx.s, block.u32); |
|||
for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++) |
|||
block.h.u.u32[j] = cpu_to_be32(ctx.s[j]); |
|||
} |
|||
diff = time_divide(time_between(time_now(), start), n); |
|||
printf("Transform gave %02x%02x%02x%02x%02x%02x... in %llu nsec\n", |
|||
block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2], |
|||
block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5], |
|||
(unsigned long long)time_to_nsec(diff)); |
|||
|
|||
/* Now, assembler variants */ |
|||
sha256(&block.h, &n, sizeof(n)); |
|||
|
|||
start = time_now(); |
|||
for (i = 0; i < n; i++) { |
|||
struct sha256_ctx ctx = SHA256_INIT; |
|||
size_t j; |
|||
sha256_rorx(block.u32, ctx.s, 1); |
|||
for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++) |
|||
block.h.u.u32[j] = cpu_to_be32(ctx.s[j]); |
|||
} |
|||
diff = time_divide(time_between(time_now(), start), n); |
|||
printf("Asm rorx for %02x%02x%02x%02x%02x%02x... is %llu nsec\n", |
|||
block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2], |
|||
block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5], |
|||
(unsigned long long)time_to_nsec(diff)); |
|||
|
|||
sha256(&block.h, &n, sizeof(n)); |
|||
|
|||
start = time_now(); |
|||
for (i = 0; i < n; i++) { |
|||
struct sha256_ctx ctx = SHA256_INIT; |
|||
size_t j; |
|||
sha256_sse4(block.u32, ctx.s, 1); |
|||
for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++) |
|||
block.h.u.u32[j] = cpu_to_be32(ctx.s[j]); |
|||
} |
|||
diff = time_divide(time_between(time_now(), start), n); |
|||
printf("Asm SSE4 for %02x%02x%02x%02x%02x%02x... is %llu nsec\n", |
|||
block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2], |
|||
block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5], |
|||
(unsigned long long)time_to_nsec(diff)); |
|||
|
|||
sha256(&block.h, &n, sizeof(n)); |
|||
start = time_now(); |
|||
for (i = 0; i < n; i++) { |
|||
struct sha256_ctx ctx = SHA256_INIT; |
|||
size_t j; |
|||
sha256_rorx_x8ms(block.u32, ctx.s, 1); |
|||
for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++) |
|||
block.h.u.u32[j] = cpu_to_be32(ctx.s[j]); |
|||
} |
|||
diff = time_divide(time_between(time_now(), start), n); |
|||
printf("Asm RORx-x8ms for %02x%02x%02x%02x%02x%02x... is %llu nsec\n", |
|||
block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2], |
|||
block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5], |
|||
(unsigned long long)time_to_nsec(diff)); |
|||
|
|||
sha256(&block.h, &n, sizeof(n)); |
|||
start = time_now(); |
|||
for (i = 0; i < n; i++) { |
|||
struct sha256_ctx ctx = SHA256_INIT; |
|||
size_t j; |
|||
sha256_avx(block.u32, ctx.s, 1); |
|||
for (j = 0; j < sizeof(ctx.s) / sizeof(ctx.s[0]); j++) |
|||
block.h.u.u32[j] = cpu_to_be32(ctx.s[j]); |
|||
} |
|||
diff = time_divide(time_between(time_now(), start), n); |
|||
printf("Asm AVX for %02x%02x%02x%02x%02x%02x... is %llu nsec\n", |
|||
block.h.u.u8[0], block.h.u.u8[1], block.h.u.u8[2], |
|||
block.h.u.u8[3], block.h.u.u8[4], block.h.u.u8[5], |
|||
(unsigned long long)time_to_nsec(diff)); |
|||
|
|||
return 0; |
|||
} |
|||
|
@ -0,0 +1,32 @@ |
|||
Copyright (c) 2012, Intel Corporation |
|||
|
|||
All rights reserved. |
|||
|
|||
Redistribution and use in source and binary forms, with or without |
|||
modification, are permitted provided that the following conditions are |
|||
met: |
|||
|
|||
* Redistributions of source code must retain the above copyright |
|||
notice, this list of conditions and the following disclaimer. |
|||
|
|||
* Redistributions in binary form must reproduce the above copyright |
|||
notice, this list of conditions and the following disclaimer in the |
|||
documentation and/or other materials provided with the |
|||
distribution. |
|||
|
|||
* Neither the name of the Intel Corporation nor the names of its |
|||
contributors may be used to endorse or promote products derived from |
|||
this software without specific prior written permission. |
|||
|
|||
|
|||
THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY |
|||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR |
|||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
|||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
@ -0,0 +1,586 @@ |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
; Copyright (c) 2012, Intel Corporation |
|||
; |
|||
; All rights reserved. |
|||
; |
|||
; Redistribution and use in source and binary forms, with or without |
|||
; modification, are permitted provided that the following conditions are |
|||
; met: |
|||
; |
|||
; * Redistributions of source code must retain the above copyright |
|||
; notice, this list of conditions and the following disclaimer. |
|||
; |
|||
; * Redistributions in binary form must reproduce the above copyright |
|||
; notice, this list of conditions and the following disclaimer in the |
|||
; documentation and/or other materials provided with the |
|||
; distribution. |
|||
; |
|||
; * Neither the name of the Intel Corporation nor the names of its |
|||
; contributors may be used to endorse or promote products derived from |
|||
; this software without specific prior written permission. |
|||
; |
|||
; |
|||
; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY |
|||
; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|||
; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|||
; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR |
|||
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|||
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|||
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|||
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
|||
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|||
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
; |
|||
; Example YASM command lines: |
|||
; Windows: yasm -Xvc -f x64 -rnasm -pnasm -o sha256_avx1.obj -g cv8 sha256_avx1.asm |
|||
; Linux: yasm -f x64 -f elf64 -X gnu -g dwarf2 -D LINUX -o sha256_avx1.o sha256_avx1.asm |
|||
; |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
; |
|||
; This code is described in an Intel White-Paper: |
|||
; "Fast SHA-256 Implementations on Intel Architecture Processors" |
|||
; |
|||
; To find it, surf to http://www.intel.com/p/en_US/embedded |
|||
; and search for that title. |
|||
; The paper is expected to be released roughly at the end of April, 2012 |
|||
; |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
; This code schedules 1 blocks at a time, with 4 lanes per block |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
%define VMOVDQ vmovdqu ;; assume buffers not aligned |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros |
|||
|
|||
; addm [mem], reg |
|||
; Add reg to mem using reg-mem add and store |
|||
%macro addm 2 |
|||
add %2, %1 |
|||
mov %1, %2 |
|||
%endm |
|||
|
|||
%macro MY_ROR 2 |
|||
shld %1,%1,(32-(%2)) |
|||
%endm |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
; COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask |
|||
; Load xmm with mem and byte swap each dword |
|||
%macro COPY_XMM_AND_BSWAP 3 |
|||
VMOVDQ %1, %2 |
|||
vpshufb %1, %1, %3 |
|||
%endmacro |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
%define X0 xmm4 |
|||
%define X1 xmm5 |
|||
%define X2 xmm6 |
|||
%define X3 xmm7 |
|||
|
|||
%define XTMP0 xmm0 |
|||
%define XTMP1 xmm1 |
|||
%define XTMP2 xmm2 |
|||
%define XTMP3 xmm3 |
|||
%define XTMP4 xmm8 |
|||
%define XFER xmm9 |
|||
%define XTMP5 xmm11 |
|||
|
|||
%define SHUF_00BA xmm10 ; shuffle xBxA -> 00BA |
|||
%define SHUF_DC00 xmm12 ; shuffle xDxC -> DC00 |
|||
%define BYTE_FLIP_MASK xmm13 |
|||
|
|||
%ifdef LINUX |
|||
%define NUM_BLKS rdx ; 3rd arg |
|||
%define CTX rsi ; 2nd arg |
|||
%define INP rdi ; 1st arg |
|||
|
|||
%define SRND rdi ; clobbers INP |
|||
%define c ecx |
|||
%define d r8d |
|||
%define e edx |
|||
%else |
|||
%define NUM_BLKS r8 ; 3rd arg |
|||
%define CTX rdx ; 2nd arg |
|||
%define INP rcx ; 1st arg |
|||
|
|||
%define SRND rcx ; clobbers INP |
|||
%define c edi |
|||
%define d esi |
|||
%define e r8d |
|||
|
|||
%endif |
|||
%define TBL rbp |
|||
%define a eax |
|||
%define b ebx |
|||
|
|||
%define f r9d |
|||
%define g r10d |
|||
%define h r11d |
|||
|
|||
%define y0 r13d |
|||
%define y1 r14d |
|||
%define y2 r15d |
|||
|
|||
|
|||
_INP_END_SIZE equ 8 |
|||
_INP_SIZE equ 8 |
|||
_XFER_SIZE equ 8 |
|||
%ifdef LINUX |
|||
_XMM_SAVE_SIZE equ 0 |
|||
%else |
|||
_XMM_SAVE_SIZE equ 8*16 |
|||
%endif |
|||
; STACK_SIZE plus pushes must be an odd multiple of 8 |
|||
_ALIGN_SIZE equ 8 |
|||
|
|||
_INP_END equ 0 |
|||
_INP equ _INP_END + _INP_END_SIZE |
|||
_XFER equ _INP + _INP_SIZE |
|||
_XMM_SAVE equ _XFER + _XFER_SIZE + _ALIGN_SIZE |
|||
STACK_SIZE equ _XMM_SAVE + _XMM_SAVE_SIZE |
|||
|
|||
; rotate_Xs |
|||
; Rotate values of symbols X0...X3 |
|||
%macro rotate_Xs 0 |
|||
%xdefine X_ X0 |
|||
%xdefine X0 X1 |
|||
%xdefine X1 X2 |
|||
%xdefine X2 X3 |
|||
%xdefine X3 X_ |
|||
%endm |
|||
|
|||
; ROTATE_ARGS |
|||
; Rotate values of symbols a...h |
|||
%macro ROTATE_ARGS 0 |
|||
%xdefine TMP_ h |
|||
%xdefine h g |
|||
%xdefine g f |
|||
%xdefine f e |
|||
%xdefine e d |
|||
%xdefine d c |
|||
%xdefine c b |
|||
%xdefine b a |
|||
%xdefine a TMP_ |
|||
%endm |
|||
|
|||
%macro FOUR_ROUNDS_AND_SCHED 0 |
|||
;; compute s0 four at a time and s1 two at a time |
|||
;; compute W[-16] + W[-7] 4 at a time |
|||
;vmovdqa XTMP0, X3 |
|||
mov y0, e ; y0 = e |
|||
MY_ROR y0, (25-11) ; y0 = e >> (25-11) |
|||
mov y1, a ; y1 = a |
|||
vpalignr XTMP0, X3, X2, 4 ; XTMP0 = W[-7] |
|||
MY_ROR y1, (22-13) ; y1 = a >> (22-13) |
|||
xor y0, e ; y0 = e ^ (e >> (25-11)) |
|||
mov y2, f ; y2 = f |
|||
MY_ROR y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6)) |
|||
;vmovdqa XTMP1, X1 |
|||
xor y1, a ; y1 = a ^ (a >> (22-13) |
|||
xor y2, g ; y2 = f^g |
|||
vpaddd XTMP0, XTMP0, X0 ; XTMP0 = W[-7] + W[-16] |
|||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) |
|||
and y2, e ; y2 = (f^g)&e |
|||
MY_ROR y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2)) |
|||
;; compute s0 |
|||
vpalignr XTMP1, X1, X0, 4 ; XTMP1 = W[-15] |
|||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) |
|||
MY_ROR y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g |
|||
|
|||
|
|||
MY_ROR y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) |
|||
add y2, y0 ; y2 = S1 + CH |
|||
add y2, [rsp + _XFER + 0*4] ; y2 = k + w + S1 + CH |
|||
|
|||
mov y0, a ; y0 = a |
|||
add h, y2 ; h = h + S1 + CH + k + w |
|||
mov y2, a ; y2 = a |
|||
|
|||
vpsrld XTMP2, XTMP1, 7 |
|||
|
|||
or y0, c ; y0 = a|c |
|||
add d, h ; d = d + h + S1 + CH + k + w |
|||
and y2, c ; y2 = a&c |
|||
|
|||
vpslld XTMP3, XTMP1, (32-7) |
|||
|
|||
and y0, b ; y0 = (a|c)&b |
|||
add h, y1 ; h = h + S1 + CH + k + w + S0 |
|||
|
|||
vpor XTMP3, XTMP3, XTMP2 ; XTMP1 = W[-15] MY_ROR 7 |
|||
|
|||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c) |
|||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ |
|||
|
|||
ROTATE_ARGS |
|||
|
|||
mov y0, e ; y0 = e |
|||
mov y1, a ; y1 = a |
|||
|
|||
|
|||
MY_ROR y0, (25-11) ; y0 = e >> (25-11) |
|||
xor y0, e ; y0 = e ^ (e >> (25-11)) |
|||
mov y2, f ; y2 = f |
|||
MY_ROR y1, (22-13) ; y1 = a >> (22-13) |
|||
|
|||
vpsrld XTMP2, XTMP1,18 |
|||
|
|||
xor y1, a ; y1 = a ^ (a >> (22-13) |
|||
MY_ROR y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6)) |
|||
xor y2, g ; y2 = f^g |
|||
|
|||
vpsrld XTMP4, XTMP1, 3 ; XTMP4 = W[-15] >> 3 |
|||
|
|||
MY_ROR y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2)) |
|||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) |
|||
and y2, e ; y2 = (f^g)&e |
|||
MY_ROR y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) |
|||
|
|||
vpslld XTMP1, XTMP1, (32-18) |
|||
|
|||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g |
|||
|
|||
vpxor XTMP3, XTMP3, XTMP1 |
|||
|
|||
add y2, y0 ; y2 = S1 + CH |
|||
add y2, [rsp + _XFER + 1*4] ; y2 = k + w + S1 + CH |
|||
MY_ROR y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) |
|||
|
|||
vpxor XTMP3, XTMP3, XTMP2 ; XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR 18 |
|||
|
|||
mov y0, a ; y0 = a |
|||
add h, y2 ; h = h + S1 + CH + k + w |
|||
mov y2, a ; y2 = a |
|||
|
|||
vpxor XTMP1, XTMP3, XTMP4 ; XTMP1 = s0 |
|||
|
|||
or y0, c ; y0 = a|c |
|||
add d, h ; d = d + h + S1 + CH + k + w |
|||
and y2, c ; y2 = a&c |
|||
;; compute low s1 |
|||
vpshufd XTMP2, X3, 11111010b ; XTMP2 = W[-2] {BBAA} |
|||
and y0, b ; y0 = (a|c)&b |
|||
add h, y1 ; h = h + S1 + CH + k + w + S0 |
|||
vpaddd XTMP0, XTMP0, XTMP1 ; XTMP0 = W[-16] + W[-7] + s0 |
|||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c) |
|||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ |
|||
|
|||
ROTATE_ARGS |
|||
;vmovdqa XTMP3, XTMP2 ; XTMP3 = W[-2] {BBAA} |
|||
|
|||
mov y0, e ; y0 = e |
|||
mov y1, a ; y1 = a |
|||
MY_ROR y0, (25-11) ; y0 = e >> (25-11) |
|||
|
|||
;vmovdqa XTMP4, XTMP2 ; XTMP4 = W[-2] {BBAA} |
|||
|
|||
xor y0, e ; y0 = e ^ (e >> (25-11)) |
|||
MY_ROR y1, (22-13) ; y1 = a >> (22-13) |
|||
mov y2, f ; y2 = f |
|||
xor y1, a ; y1 = a ^ (a >> (22-13) |
|||
MY_ROR y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6)) |
|||
|
|||
vpsrld XTMP4, XTMP2, 10 ; XTMP4 = W[-2] >> 10 {BBAA} |
|||
|
|||
xor y2, g ; y2 = f^g |
|||
|
|||
vpsrlq XTMP3, XTMP2, 19 ; XTMP3 = W[-2] MY_ROR 19 {xBxA} |
|||
|
|||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) |
|||
and y2, e ; y2 = (f^g)&e |
|||
|
|||
vpsrlq XTMP2, XTMP2, 17 ; XTMP2 = W[-2] MY_ROR 17 {xBxA} |
|||
|
|||
MY_ROR y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2)) |
|||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g |
|||
MY_ROR y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) |
|||
vpxor XTMP2, XTMP2, XTMP3 |
|||
add y2, y0 ; y2 = S1 + CH |
|||
MY_ROR y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) |
|||
add y2, [rsp + _XFER + 2*4] ; y2 = k + w + S1 + CH |
|||
vpxor XTMP4, XTMP4, XTMP2 ; XTMP4 = s1 {xBxA} |
|||
mov y0, a ; y0 = a |
|||
add h, y2 ; h = h + S1 + CH + k + w |
|||
mov y2, a ; y2 = a |
|||
vpshufb XTMP4, XTMP4, SHUF_00BA ; XTMP4 = s1 {00BA} |
|||
or y0, c ; y0 = a|c |
|||
add d, h ; d = d + h + S1 + CH + k + w |
|||
and y2, c ; y2 = a&c |
|||
vpaddd XTMP0, XTMP0, XTMP4 ; XTMP0 = {..., ..., W[1], W[0]} |
|||
and y0, b ; y0 = (a|c)&b |
|||
add h, y1 ; h = h + S1 + CH + k + w + S0 |
|||
;; compute high s1 |
|||
vpshufd XTMP2, XTMP0, 01010000b ; XTMP2 = W[-2] {DDCC} |
|||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c) |
|||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ |
|||
|
|||
ROTATE_ARGS |
|||
;vmovdqa XTMP3, XTMP2 ; XTMP3 = W[-2] {DDCC} |
|||
mov y0, e ; y0 = e |
|||
MY_ROR y0, (25-11) ; y0 = e >> (25-11) |
|||
mov y1, a ; y1 = a |
|||
;vmovdqa XTMP5, XTMP2 ; XTMP5 = W[-2] {DDCC} |
|||
MY_ROR y1, (22-13) ; y1 = a >> (22-13) |
|||
xor y0, e ; y0 = e ^ (e >> (25-11)) |
|||
mov y2, f ; y2 = f |
|||
MY_ROR y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6)) |
|||
|
|||
vpsrld XTMP5, XTMP2, 10 ; XTMP5 = W[-2] >> 10 {DDCC} |
|||
|
|||
xor y1, a ; y1 = a ^ (a >> (22-13) |
|||
xor y2, g ; y2 = f^g |
|||
|
|||
vpsrlq XTMP3, XTMP2, 19 ; XTMP3 = W[-2] MY_ROR 19 {xDxC} |
|||
|
|||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) |
|||
and y2, e ; y2 = (f^g)&e |
|||
MY_ROR y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2)) |
|||
|
|||
vpsrlq XTMP2, XTMP2, 17 ; XTMP2 = W[-2] MY_ROR 17 {xDxC} |
|||
|
|||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) |
|||
MY_ROR y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g |
|||
|
|||
vpxor XTMP2, XTMP2, XTMP3 |
|||
|
|||
MY_ROR y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) |
|||
add y2, y0 ; y2 = S1 + CH |
|||
add y2, [rsp + _XFER + 3*4] ; y2 = k + w + S1 + CH |
|||
vpxor XTMP5, XTMP5, XTMP2 ; XTMP5 = s1 {xDxC} |
|||
mov y0, a ; y0 = a |
|||
add h, y2 ; h = h + S1 + CH + k + w |
|||
mov y2, a ; y2 = a |
|||
vpshufb XTMP5, XTMP5, SHUF_DC00 ; XTMP5 = s1 {DC00} |
|||
or y0, c ; y0 = a|c |
|||
add d, h ; d = d + h + S1 + CH + k + w |
|||
and y2, c ; y2 = a&c |
|||
vpaddd X0, XTMP5, XTMP0 ; X0 = {W[3], W[2], W[1], W[0]} |
|||
and y0, b ; y0 = (a|c)&b |
|||
add h, y1 ; h = h + S1 + CH + k + w + S0 |
|||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c) |
|||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ |
|||
|
|||
ROTATE_ARGS |
|||
rotate_Xs |
|||
%endm |
|||
|
|||
;; input is [rsp + _XFER + %1 * 4] |
|||
%macro DO_ROUND 1 |
|||
mov y0, e ; y0 = e |
|||
MY_ROR y0, (25-11) ; y0 = e >> (25-11) |
|||
mov y1, a ; y1 = a |
|||
xor y0, e ; y0 = e ^ (e >> (25-11)) |
|||
MY_ROR y1, (22-13) ; y1 = a >> (22-13) |
|||
mov y2, f ; y2 = f |
|||
xor y1, a ; y1 = a ^ (a >> (22-13) |
|||
MY_ROR y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6)) |
|||
xor y2, g ; y2 = f^g |
|||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) |
|||
MY_ROR y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2)) |
|||
and y2, e ; y2 = (f^g)&e |
|||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) |
|||
MY_ROR y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g |
|||
add y2, y0 ; y2 = S1 + CH |
|||
MY_ROR y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) |
|||
add y2, [rsp + _XFER + %1 * 4] ; y2 = k + w + S1 + CH |
|||
mov y0, a ; y0 = a |
|||
add h, y2 ; h = h + S1 + CH + k + w |
|||
mov y2, a ; y2 = a |
|||
or y0, c ; y0 = a|c |
|||
add d, h ; d = d + h + S1 + CH + k + w |
|||
and y2, c ; y2 = a&c |
|||
and y0, b ; y0 = (a|c)&b |
|||
add h, y1 ; h = h + S1 + CH + k + w + S0 |
|||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c) |
|||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ |
|||
ROTATE_ARGS |
|||
%endm |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
;; void sha256_avx(void *input_data, UINT32 digest[8], UINT64 num_blks) |
|||
;; arg 1 : pointer to input data |
|||
;; arg 2 : pointer to digest |
|||
;; arg 3 : Num blocks |
|||
section .text |
|||
global sha256_avx |
|||
align 32 |
|||
sha256_avx: |
|||
push rbx |
|||
%ifndef LINUX |
|||
push rsi |
|||
push rdi |
|||
%endif |
|||
push rbp |
|||
push r13 |
|||
push r14 |
|||
push r15 |
|||
|
|||
sub rsp,STACK_SIZE |
|||
%ifndef LINUX |
|||
vmovdqa [rsp + _XMM_SAVE + 0*16],xmm6 |
|||
vmovdqa [rsp + _XMM_SAVE + 1*16],xmm7 |
|||
vmovdqa [rsp + _XMM_SAVE + 2*16],xmm8 |
|||
vmovdqa [rsp + _XMM_SAVE + 3*16],xmm9 |
|||
vmovdqa [rsp + _XMM_SAVE + 4*16],xmm10 |
|||
vmovdqa [rsp + _XMM_SAVE + 5*16],xmm11 |
|||
vmovdqa [rsp + _XMM_SAVE + 6*16],xmm12 |
|||
vmovdqa [rsp + _XMM_SAVE + 7*16],xmm13 |
|||
%endif |
|||
|
|||
shl NUM_BLKS, 6 ; convert to bytes |
|||
jz done_hash |
|||
add NUM_BLKS, INP ; pointer to end of data |
|||
mov [rsp + _INP_END], NUM_BLKS |
|||
|
|||
;; load initial digest |
|||
mov a,[4*0 + CTX] |
|||
mov b,[4*1 + CTX] |
|||
mov c,[4*2 + CTX] |
|||
mov d,[4*3 + CTX] |
|||
mov e,[4*4 + CTX] |
|||
mov f,[4*5 + CTX] |
|||
mov g,[4*6 + CTX] |
|||
mov h,[4*7 + CTX] |
|||
|
|||
vmovdqa BYTE_FLIP_MASK, [PSHUFFLE_BYTE_FLIP_MASK wrt rip] |
|||
vmovdqa SHUF_00BA, [_SHUF_00BA wrt rip] |
|||
vmovdqa SHUF_DC00, [_SHUF_DC00 wrt rip] |
|||
|
|||
loop0: |
|||
lea TBL,[K256 wrt rip] |
|||
|
|||
;; byte swap first 16 dwords |
|||
COPY_XMM_AND_BSWAP X0, [INP + 0*16], BYTE_FLIP_MASK |
|||
COPY_XMM_AND_BSWAP X1, [INP + 1*16], BYTE_FLIP_MASK |
|||
COPY_XMM_AND_BSWAP X2, [INP + 2*16], BYTE_FLIP_MASK |
|||
COPY_XMM_AND_BSWAP X3, [INP + 3*16], BYTE_FLIP_MASK |
|||
|
|||
mov [rsp + _INP], INP |
|||
|
|||
;; schedule 48 input dwords, by doing 3 rounds of 16 each |
|||
mov SRND, 3 |
|||
align 16 |
|||
loop1: |
|||
vpaddd XFER, X0, [TBL + 0*16] |
|||
vmovdqa [rsp + _XFER], XFER |
|||
FOUR_ROUNDS_AND_SCHED |
|||
|
|||
vpaddd XFER, X0, [TBL + 1*16] |
|||
vmovdqa [rsp + _XFER], XFER |
|||
FOUR_ROUNDS_AND_SCHED |
|||
|
|||
vpaddd XFER, X0, [TBL + 2*16] |
|||
vmovdqa [rsp + _XFER], XFER |
|||
FOUR_ROUNDS_AND_SCHED |
|||
|
|||
vpaddd XFER, X0, [TBL + 3*16] |
|||
vmovdqa [rsp + _XFER], XFER |
|||
add TBL, 4*16 |
|||
FOUR_ROUNDS_AND_SCHED |
|||
|
|||
sub SRND, 1 |
|||
jne loop1 |
|||
|
|||
mov SRND, 2 |
|||
loop2: |
|||
vpaddd XFER, X0, [TBL + 0*16] |
|||
vmovdqa [rsp + _XFER], XFER |
|||
DO_ROUND 0 |
|||
DO_ROUND 1 |
|||
DO_ROUND 2 |
|||
DO_ROUND 3 |
|||
|
|||
vpaddd XFER, X1, [TBL + 1*16] |
|||
vmovdqa [rsp + _XFER], XFER |
|||
add TBL, 2*16 |
|||
DO_ROUND 0 |
|||
DO_ROUND 1 |
|||
DO_ROUND 2 |
|||
DO_ROUND 3 |
|||
|
|||
vmovdqa X0, X2 |
|||
vmovdqa X1, X3 |
|||
|
|||
sub SRND, 1 |
|||
jne loop2 |
|||
|
|||
|
|||
addm [4*0 + CTX],a |
|||
addm [4*1 + CTX],b |
|||
addm [4*2 + CTX],c |
|||
addm [4*3 + CTX],d |
|||
addm [4*4 + CTX],e |
|||
addm [4*5 + CTX],f |
|||
addm [4*6 + CTX],g |
|||
addm [4*7 + CTX],h |
|||
|
|||
mov INP, [rsp + _INP] |
|||
add INP, 64 |
|||
cmp INP, [rsp + _INP_END] |
|||
jne loop0 |
|||
|
|||
done_hash: |
|||
%ifndef LINUX |
|||
vmovdqa xmm6,[rsp + _XMM_SAVE + 0*16] |
|||
vmovdqa xmm7,[rsp + _XMM_SAVE + 1*16] |
|||
vmovdqa xmm8,[rsp + _XMM_SAVE + 2*16] |
|||
vmovdqa xmm9,[rsp + _XMM_SAVE + 3*16] |
|||
vmovdqa xmm10,[rsp + _XMM_SAVE + 4*16] |
|||
vmovdqa xmm11,[rsp + _XMM_SAVE + 5*16] |
|||
vmovdqa xmm12,[rsp + _XMM_SAVE + 6*16] |
|||
vmovdqa xmm13,[rsp + _XMM_SAVE + 7*16] |
|||
%endif |
|||
|
|||
|
|||
add rsp, STACK_SIZE |
|||
|
|||
pop r15 |
|||
pop r14 |
|||
pop r13 |
|||
pop rbp |
|||
%ifndef LINUX |
|||
pop rdi |
|||
pop rsi |
|||
%endif |
|||
pop rbx |
|||
|
|||
ret |
|||
|
|||
|
|||
section .data |
|||
align 64 |
|||
K256: |
|||
dd 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 |
|||
dd 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 |
|||
dd 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 |
|||
dd 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 |
|||
dd 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc |
|||
dd 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da |
|||
dd 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 |
|||
dd 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 |
|||
dd 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 |
|||
dd 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 |
|||
dd 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 |
|||
dd 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 |
|||
dd 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 |
|||
dd 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 |
|||
dd 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 |
|||
dd 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 |
|||
|
|||
PSHUFFLE_BYTE_FLIP_MASK: ddq 0x0c0d0e0f08090a0b0405060700010203 |
|||
|
|||
; shuffle xBxA -> 00BA |
|||
_SHUF_00BA: ddq 0xFFFFFFFFFFFFFFFF0b0a090803020100 |
|||
|
|||
; shuffle xDxC -> DC00 |
|||
_SHUF_DC00: ddq 0x0b0a090803020100FFFFFFFFFFFFFFFF |
@ -0,0 +1,826 @@ |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
; Copyright (c) 2012, Intel Corporation |
|||
; |
|||
; All rights reserved. |
|||
; |
|||
; Redistribution and use in source and binary forms, with or without |
|||
; modification, are permitted provided that the following conditions are |
|||
; met: |
|||
; |
|||
; * Redistributions of source code must retain the above copyright |
|||
; notice, this list of conditions and the following disclaimer. |
|||
; |
|||
; * Redistributions in binary form must reproduce the above copyright |
|||
; notice, this list of conditions and the following disclaimer in the |
|||
; documentation and/or other materials provided with the |
|||
; distribution. |
|||
; |
|||
; * Neither the name of the Intel Corporation nor the names of its |
|||
; contributors may be used to endorse or promote products derived from |
|||
; this software without specific prior written permission. |
|||
; |
|||
; |
|||
; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY |
|||
; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|||
; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|||
; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR |
|||
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|||
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|||
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|||
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
|||
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|||
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
; |
|||
; Example YASM command lines: |
|||
; Windows: yasm -Xvc -f x64 -rnasm -pnasm -o sha256_avx2_rorx2.obj -g cv8 sha256_avx2_rorx2.asm |
|||
; Linux: yasm -f x64 -f elf64 -X gnu -g dwarf2 -D LINUX -o sha256_avx2_rorx2.o sha256_avx2_rorx2.asm |
|||
; |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
; |
|||
; This code is described in an Intel White-Paper: |
|||
; "Fast SHA-256 Implementations on Intel Architecture Processors" |
|||
; |
|||
; To find it, surf to http://www.intel.com/p/en_US/embedded |
|||
; and search for that title. |
|||
; The paper is expected to be released roughly at the end of April, 2012 |
|||
; |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
; This code schedules 2 blocks at a time, with 4 lanes per block |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
%define VMOVDQ vmovdqu ;; assume buffers not aligned |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros |
|||
|
|||
; addm [mem], reg |
|||
; Add reg to mem using reg-mem add and store |
|||
%macro addm 2 |
|||
add %2, %1 |
|||
mov %1, %2 |
|||
%endm |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
%define X0 ymm4 |
|||
%define X1 ymm5 |
|||
%define X2 ymm6 |
|||
%define X3 ymm7 |
|||
|
|||
; XMM versions of above |
|||
%define XWORD0 xmm4 |
|||
%define XWORD1 xmm5 |
|||
%define XWORD2 xmm6 |
|||
%define XWORD3 xmm7 |
|||
|
|||
%define XTMP0 ymm0 |
|||
%define XTMP1 ymm1 |
|||
%define XTMP2 ymm2 |
|||
%define XTMP3 ymm3 |
|||
%define XTMP4 ymm8 |
|||
%define XFER ymm9 |
|||
%define XTMP5 ymm11 |
|||
|
|||
%define SHUF_00BA ymm10 ; shuffle xBxA -> 00BA |
|||
%define SHUF_DC00 ymm12 ; shuffle xDxC -> DC00 |
|||
%define BYTE_FLIP_MASK ymm13 |
|||
|
|||
%define X_BYTE_FLIP_MASK xmm13 ; XMM version of BYTE_FLIP_MASK |
|||
|
|||
%ifdef LINUX |
|||
%define NUM_BLKS rdx ; 3rd arg |
|||
%define CTX rsi ; 2nd arg |
|||
%define INP rdi ; 1st arg |
|||
%define c ecx |
|||
%define d r8d |
|||
%define e edx ; clobbers NUM_BLKS |
|||
%define y3 edi ; clobbers INP |
|||
%else |
|||
%define NUM_BLKS r8 ; 3rd arg |
|||
%define CTX rdx ; 2nd arg |
|||
%define INP rcx ; 1st arg |
|||
%define c edi |
|||
%define d esi |
|||
%define e r8d ; clobbers NUM_BLKS |
|||
%define y3 ecx ; clobbers INP |
|||
|
|||
%endif |
|||
|
|||
|
|||
%define TBL rbp |
|||
%define SRND CTX ; SRND is same register as CTX |
|||
|
|||
%define a eax |
|||
%define b ebx |
|||
%define f r9d |
|||
%define g r10d |
|||
%define h r11d |
|||
%define old_h r11d |
|||
|
|||
%define T1 r12d |
|||
%define y0 r13d |
|||
%define y1 r14d |
|||
%define y2 r15d |
|||
|
|||
|
|||
_XFER_SIZE equ 2*64*4 ; 2 blocks, 64 rounds, 4 bytes/round |
|||
%ifdef LINUX |
|||
_XMM_SAVE_SIZE equ 0 |
|||
%else |
|||
_XMM_SAVE_SIZE equ 8*16 |
|||
%endif |
|||
_INP_END_SIZE equ 8 |
|||
_INP_SIZE equ 8 |
|||
_CTX_SIZE equ 8 |
|||
_RSP_SIZE equ 8 |
|||
|
|||
_XFER equ 0 |
|||
_XMM_SAVE equ _XFER + _XFER_SIZE |
|||
_INP_END equ _XMM_SAVE + _XMM_SAVE_SIZE |
|||
_INP equ _INP_END + _INP_END_SIZE |
|||
_CTX equ _INP + _INP_SIZE |
|||
_RSP equ _CTX + _CTX_SIZE |
|||
STACK_SIZE equ _RSP + _RSP_SIZE |
|||
|
|||
; rotate_Xs |
|||
; Rotate values of symbols X0...X3 |
|||
%macro rotate_Xs 0 |
|||
%xdefine X_ X0 |
|||
%xdefine X0 X1 |
|||
%xdefine X1 X2 |
|||
%xdefine X2 X3 |
|||
%xdefine X3 X_ |
|||
%endm |
|||
|
|||
; ROTATE_ARGS |
|||
; Rotate values of symbols a...h |
|||
%macro ROTATE_ARGS 0 |
|||
%xdefine old_h h |
|||
%xdefine TMP_ h |
|||
%xdefine h g |
|||
%xdefine g f |
|||
%xdefine f e |
|||
%xdefine e d |
|||
%xdefine d c |
|||
%xdefine c b |
|||
%xdefine b a |
|||
%xdefine a TMP_ |
|||
%endm |
|||
|
|||
%macro FOUR_ROUNDS_AND_SCHED 1 |
|||
%define %%XFER %1 |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 0 ;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
mov y3, a ; y3 = a ; MAJA |
|||
rorx y0, e, 25 ; y0 = e >> 25 ; S1A |
|||
rorx y1, e, 11 ; y1 = e >> 11 ; S1B |
|||
|
|||
add h, dword[%%XFER+0*4] ; h = k + w + h ; -- |
|||
or y3, c ; y3 = a|c ; MAJA |
|||
vpalignr XTMP0, X3, X2, 4 ; XTMP0 = W[-7] |
|||
mov y2, f ; y2 = f ; CH |
|||
rorx T1, a, 13 ; T1 = a >> 13 ; S0B |
|||
|
|||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ; S1 |
|||
xor y2, g ; y2 = f^g ; CH |
|||
vpaddd XTMP0, XTMP0, X0 ; XTMP0 = W[-7] + W[-16]; y1 = (e >> 6) ; S1 |
|||
rorx y1, e, 6 ; y1 = (e >> 6) ; S1 |
|||
|
|||
and y2, e ; y2 = (f^g)&e ; CH |
|||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1 |
|||
rorx y1, a, 22 ; y1 = a >> 22 ; S0A |
|||
add d, h ; d = k + w + h + d ; -- |
|||
|
|||
and y3, b ; y3 = (a|c)&b ; MAJA |
|||
vpalignr XTMP1, X1, X0, 4 ; XTMP1 = W[-15] |
|||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ; S0 |
|||
rorx T1, a, 2 ; T1 = (a >> 2) ; S0 |
|||
|
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g ; CH |
|||
vpsrld XTMP2, XTMP1, 7 |
|||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0 |
|||
mov T1, a ; T1 = a ; MAJB |
|||
and T1, c ; T1 = a&c ; MAJB |
|||
|
|||
add y2, y0 ; y2 = S1 + CH ; -- |
|||
vpslld XTMP3, XTMP1, (32-7) |
|||
or y3, T1 ; y3 = MAJ = (a|c)&b)|(a&c) ; MAJ |
|||
add h, y1 ; h = k + w + h + S0 ; -- |
|||
|
|||
add d, y2 ; d = k + w + h + d + S1 + CH = d + t1 ; -- |
|||
vpor XTMP3, XTMP3, XTMP2 ; XTMP3 = W[-15] ror 7 |
|||
|
|||
vpsrld XTMP2, XTMP1,18 |
|||
add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; -- |
|||
add h, y3 ; h = t1 + S0 + MAJ ; -- |
|||
|
|||
|
|||
ROTATE_ARGS |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
|
|||
mov y3, a ; y3 = a ; MAJA |
|||
rorx y0, e, 25 ; y0 = e >> 25 ; S1A |
|||
rorx y1, e, 11 ; y1 = e >> 11 ; S1B |
|||
add h, dword[%%XFER+1*4] ; h = k + w + h ; -- |
|||
or y3, c ; y3 = a|c ; MAJA |
|||
|
|||
|
|||
vpsrld XTMP4, XTMP1, 3 ; XTMP4 = W[-15] >> 3 |
|||
mov y2, f ; y2 = f ; CH |
|||
rorx T1, a, 13 ; T1 = a >> 13 ; S0B |
|||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ; S1 |
|||
xor y2, g ; y2 = f^g ; CH |
|||
|
|||
|
|||
rorx y1, e, 6 ; y1 = (e >> 6) ; S1 |
|||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1 |
|||
rorx y1, a, 22 ; y1 = a >> 22 ; S0A |
|||
and y2, e ; y2 = (f^g)&e ; CH |
|||
add d, h ; d = k + w + h + d ; -- |
|||
|
|||
vpslld XTMP1, XTMP1, (32-18) |
|||
and y3, b ; y3 = (a|c)&b ; MAJA |
|||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ; S0 |
|||
|
|||
vpxor XTMP3, XTMP3, XTMP1 |
|||
rorx T1, a, 2 ; T1 = (a >> 2) ; S0 |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g ; CH |
|||
|
|||
vpxor XTMP3, XTMP3, XTMP2 ; XTMP3 = W[-15] ror 7 ^ W[-15] ror 18 |
|||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0 |
|||
mov T1, a ; T1 = a ; MAJB |
|||
and T1, c ; T1 = a&c ; MAJB |
|||
add y2, y0 ; y2 = S1 + CH ; -- |
|||
|
|||
vpxor XTMP1, XTMP3, XTMP4 ; XTMP1 = s0 |
|||
vpshufd XTMP2, X3, 11111010b ; XTMP2 = W[-2] {BBAA} |
|||
or y3, T1 ; y3 = MAJ = (a|c)&b)|(a&c) ; MAJ |
|||
add h, y1 ; h = k + w + h + S0 ; -- |
|||
|
|||
vpaddd XTMP0, XTMP0, XTMP1 ; XTMP0 = W[-16] + W[-7] + s0 |
|||
add d, y2 ; d = k + w + h + d + S1 + CH = d + t1 ; -- |
|||
add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; -- |
|||
add h, y3 ; h = t1 + S0 + MAJ ; -- |
|||
|
|||
vpsrld XTMP4, XTMP2, 10 ; XTMP4 = W[-2] >> 10 {BBAA} |
|||
|
|||
|
|||
ROTATE_ARGS |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
mov y3, a ; y3 = a ; MAJA |
|||
rorx y0, e, 25 ; y0 = e >> 25 ; S1A |
|||
add h, [%%XFER+2*4] ; h = k + w + h ; -- |
|||
|
|||
vpsrlq XTMP3, XTMP2, 19 ; XTMP3 = W[-2] ror 19 {xBxA} |
|||
rorx y1, e, 11 ; y1 = e >> 11 ; S1B |
|||
or y3, c ; y3 = a|c ; MAJA |
|||
mov y2, f ; y2 = f ; CH |
|||
xor y2, g ; y2 = f^g ; CH |
|||
|
|||
rorx T1, a, 13 ; T1 = a >> 13 ; S0B |
|||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ; S1 |
|||
vpsrlq XTMP2, XTMP2, 17 ; XTMP2 = W[-2] ror 17 {xBxA} |
|||
and y2, e ; y2 = (f^g)&e ; CH |
|||
|
|||
rorx y1, e, 6 ; y1 = (e >> 6) ; S1 |
|||
vpxor XTMP2, XTMP2, XTMP3 |
|||
add d, h ; d = k + w + h + d ; -- |
|||
and y3, b ; y3 = (a|c)&b ; MAJA |
|||
|
|||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1 |
|||
rorx y1, a, 22 ; y1 = a >> 22 ; S0A |
|||
vpxor XTMP4, XTMP4, XTMP2 ; XTMP4 = s1 {xBxA} |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g ; CH |
|||
|
|||
vpshufb XTMP4, XTMP4, SHUF_00BA ; XTMP4 = s1 {00BA} |
|||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ; S0 |
|||
rorx T1, a, 2 ; T1 = (a >> 2) ; S0 |
|||
vpaddd XTMP0, XTMP0, XTMP4 ; XTMP0 = {..., ..., W[1], W[0]} |
|||
|
|||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0 |
|||
mov T1, a ; T1 = a ; MAJB |
|||
and T1, c ; T1 = a&c ; MAJB |
|||
add y2, y0 ; y2 = S1 + CH ; -- |
|||
vpshufd XTMP2, XTMP0, 01010000b ; XTMP2 = W[-2] {DDCC} |
|||
|
|||
or y3, T1 ; y3 = MAJ = (a|c)&b)|(a&c) ; MAJ |
|||
add h, y1 ; h = k + w + h + S0 ; -- |
|||
add d, y2 ; d = k + w + h + d + S1 + CH = d + t1 ; -- |
|||
add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; -- |
|||
|
|||
add h, y3 ; h = t1 + S0 + MAJ ; -- |
|||
|
|||
|
|||
ROTATE_ARGS |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 3 ;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
mov y3, a ; y3 = a ; MAJA |
|||
rorx y0, e, 25 ; y0 = e >> 25 ; S1A |
|||
rorx y1, e, 11 ; y1 = e >> 11 ; S1B |
|||
add h, dword[%%XFER+3*4] ; h = k + w + h ; -- |
|||
or y3, c ; y3 = a|c ; MAJA |
|||
|
|||
|
|||
vpsrld XTMP5, XTMP2, 10 ; XTMP5 = W[-2] >> 10 {DDCC} |
|||
mov y2, f ; y2 = f ; CH |
|||
rorx T1, a, 13 ; T1 = a >> 13 ; S0B |
|||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ; S1 |
|||
xor y2, g ; y2 = f^g ; CH |
|||
|
|||
|
|||
vpsrlq XTMP3, XTMP2, 19 ; XTMP3 = W[-2] ror 19 {xDxC} |
|||
rorx y1, e, 6 ; y1 = (e >> 6) ; S1 |
|||
and y2, e ; y2 = (f^g)&e ; CH |
|||
add d, h ; d = k + w + h + d ; -- |
|||
and y3, b ; y3 = (a|c)&b ; MAJA |
|||
|
|||
vpsrlq XTMP2, XTMP2, 17 ; XTMP2 = W[-2] ror 17 {xDxC} |
|||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1 |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g ; CH |
|||
|
|||
vpxor XTMP2, XTMP2, XTMP3 |
|||
rorx y1, a, 22 ; y1 = a >> 22 ; S0A |
|||
add y2, y0 ; y2 = S1 + CH ; -- |
|||
|
|||
vpxor XTMP5, XTMP5, XTMP2 ; XTMP5 = s1 {xDxC} |
|||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ; S0 |
|||
add d, y2 ; d = k + w + h + d + S1 + CH = d + t1 ; -- |
|||
|
|||
rorx T1, a, 2 ; T1 = (a >> 2) ; S0 |
|||
vpshufb XTMP5, XTMP5, SHUF_DC00 ; XTMP5 = s1 {DC00} |
|||
|
|||
vpaddd X0, XTMP5, XTMP0 ; X0 = {W[3], W[2], W[1], W[0]} |
|||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0 |
|||
mov T1, a ; T1 = a ; MAJB |
|||
and T1, c ; T1 = a&c ; MAJB |
|||
or y3, T1 ; y3 = MAJ = (a|c)&b)|(a&c) ; MAJ |
|||
|
|||
add h, y1 ; h = k + w + h + S0 ; -- |
|||
add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; -- |
|||
add h, y3 ; h = t1 + S0 + MAJ ; -- |
|||
|
|||
ROTATE_ARGS |
|||
rotate_Xs |
|||
%endm |
|||
|
|||
%macro DO_4ROUNDS 1 |
|||
%define %%XFER %1 |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 0 ;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
mov y2, f ; y2 = f ; CH |
|||
rorx y0, e, 25 ; y0 = e >> 25 ; S1A |
|||
rorx y1, e, 11 ; y1 = e >> 11 ; S1B |
|||
xor y2, g ; y2 = f^g ; CH |
|||
|
|||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ; S1 |
|||
rorx y1, e, 6 ; y1 = (e >> 6) ; S1 |
|||
and y2, e ; y2 = (f^g)&e ; CH |
|||
|
|||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1 |
|||
rorx T1, a, 13 ; T1 = a >> 13 ; S0B |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g ; CH |
|||
rorx y1, a, 22 ; y1 = a >> 22 ; S0A |
|||
mov y3, a ; y3 = a ; MAJA |
|||
|
|||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ; S0 |
|||
rorx T1, a, 2 ; T1 = (a >> 2) ; S0 |
|||
add h, dword[%%XFER + 4*0] ; h = k + w + h ; -- |
|||
or y3, c ; y3 = a|c ; MAJA |
|||
|
|||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0 |
|||
mov T1, a ; T1 = a ; MAJB |
|||
and y3, b ; y3 = (a|c)&b ; MAJA |
|||
and T1, c ; T1 = a&c ; MAJB |
|||
add y2, y0 ; y2 = S1 + CH ; -- |
|||
|
|||
|
|||
add d, h ; d = k + w + h + d ; -- |
|||
or y3, T1 ; y3 = MAJ = (a|c)&b)|(a&c) ; MAJ |
|||
add h, y1 ; h = k + w + h + S0 ; -- |
|||
|
|||
add d, y2 ; d = k + w + h + d + S1 + CH = d + t1 ; -- |
|||
|
|||
|
|||
;add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; -- |
|||
|
|||
;add h, y3 ; h = t1 + S0 + MAJ ; -- |
|||
|
|||
ROTATE_ARGS |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 1 ;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
add old_h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; -- |
|||
mov y2, f ; y2 = f ; CH |
|||
rorx y0, e, 25 ; y0 = e >> 25 ; S1A |
|||
rorx y1, e, 11 ; y1 = e >> 11 ; S1B |
|||
xor y2, g ; y2 = f^g ; CH |
|||
|
|||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ; S1 |
|||
rorx y1, e, 6 ; y1 = (e >> 6) ; S1 |
|||
and y2, e ; y2 = (f^g)&e ; CH |
|||
add old_h, y3 ; h = t1 + S0 + MAJ ; -- |
|||
|
|||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1 |
|||
rorx T1, a, 13 ; T1 = a >> 13 ; S0B |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g ; CH |
|||
rorx y1, a, 22 ; y1 = a >> 22 ; S0A |
|||
mov y3, a ; y3 = a ; MAJA |
|||
|
|||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ; S0 |
|||
rorx T1, a, 2 ; T1 = (a >> 2) ; S0 |
|||
add h, dword[%%XFER + 4*1] ; h = k + w + h ; -- |
|||
or y3, c ; y3 = a|c ; MAJA |
|||
|
|||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0 |
|||
mov T1, a ; T1 = a ; MAJB |
|||
and y3, b ; y3 = (a|c)&b ; MAJA |
|||
and T1, c ; T1 = a&c ; MAJB |
|||
add y2, y0 ; y2 = S1 + CH ; -- |
|||
|
|||
|
|||
add d, h ; d = k + w + h + d ; -- |
|||
or y3, T1 ; y3 = MAJ = (a|c)&b)|(a&c) ; MAJ |
|||
add h, y1 ; h = k + w + h + S0 ; -- |
|||
|
|||
add d, y2 ; d = k + w + h + d + S1 + CH = d + t1 ; -- |
|||
|
|||
|
|||
;add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; -- |
|||
|
|||
;add h, y3 ; h = t1 + S0 + MAJ ; -- |
|||
|
|||
ROTATE_ARGS |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 2 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
add old_h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; -- |
|||
mov y2, f ; y2 = f ; CH |
|||
rorx y0, e, 25 ; y0 = e >> 25 ; S1A |
|||
rorx y1, e, 11 ; y1 = e >> 11 ; S1B |
|||
xor y2, g ; y2 = f^g ; CH |
|||
|
|||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ; S1 |
|||
rorx y1, e, 6 ; y1 = (e >> 6) ; S1 |
|||
and y2, e ; y2 = (f^g)&e ; CH |
|||
add old_h, y3 ; h = t1 + S0 + MAJ ; -- |
|||
|
|||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1 |
|||
rorx T1, a, 13 ; T1 = a >> 13 ; S0B |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g ; CH |
|||
rorx y1, a, 22 ; y1 = a >> 22 ; S0A |
|||
mov y3, a ; y3 = a ; MAJA |
|||
|
|||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ; S0 |
|||
rorx T1, a, 2 ; T1 = (a >> 2) ; S0 |
|||
add h, dword[%%XFER + 4*2] ; h = k + w + h ; -- |
|||
or y3, c ; y3 = a|c ; MAJA |
|||
|
|||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0 |
|||
mov T1, a ; T1 = a ; MAJB |
|||
and y3, b ; y3 = (a|c)&b ; MAJA |
|||
and T1, c ; T1 = a&c ; MAJB |
|||
add y2, y0 ; y2 = S1 + CH ; -- |
|||
|
|||
|
|||
add d, h ; d = k + w + h + d ; -- |
|||
or y3, T1 ; y3 = MAJ = (a|c)&b)|(a&c) ; MAJ |
|||
add h, y1 ; h = k + w + h + S0 ; -- |
|||
|
|||
add d, y2 ; d = k + w + h + d + S1 + CH = d + t1 ; -- |
|||
|
|||
|
|||
;add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; -- |
|||
|
|||
;add h, y3 ; h = t1 + S0 + MAJ ; -- |
|||
|
|||
ROTATE_ARGS |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 3 ;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
add old_h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; -- |
|||
mov y2, f ; y2 = f ; CH |
|||
rorx y0, e, 25 ; y0 = e >> 25 ; S1A |
|||
rorx y1, e, 11 ; y1 = e >> 11 ; S1B |
|||
xor y2, g ; y2 = f^g ; CH |
|||
|
|||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ; S1 |
|||
rorx y1, e, 6 ; y1 = (e >> 6) ; S1 |
|||
and y2, e ; y2 = (f^g)&e ; CH |
|||
add old_h, y3 ; h = t1 + S0 + MAJ ; -- |
|||
|
|||
xor y0, y1 ; y0 = (e>>25) ^ (e>>11) ^ (e>>6) ; S1 |
|||
rorx T1, a, 13 ; T1 = a >> 13 ; S0B |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g ; CH |
|||
rorx y1, a, 22 ; y1 = a >> 22 ; S0A |
|||
mov y3, a ; y3 = a ; MAJA |
|||
|
|||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ; S0 |
|||
rorx T1, a, 2 ; T1 = (a >> 2) ; S0 |
|||
add h, dword[%%XFER + 4*3] ; h = k + w + h ; -- |
|||
or y3, c ; y3 = a|c ; MAJA |
|||
|
|||
xor y1, T1 ; y1 = (a>>22) ^ (a>>13) ^ (a>>2) ; S0 |
|||
mov T1, a ; T1 = a ; MAJB |
|||
and y3, b ; y3 = (a|c)&b ; MAJA |
|||
and T1, c ; T1 = a&c ; MAJB |
|||
add y2, y0 ; y2 = S1 + CH ; -- |
|||
|
|||
|
|||
add d, h ; d = k + w + h + d ; -- |
|||
or y3, T1 ; y3 = MAJ = (a|c)&b)|(a&c) ; MAJ |
|||
add h, y1 ; h = k + w + h + S0 ; -- |
|||
|
|||
add d, y2 ; d = k + w + h + d + S1 + CH = d + t1 ; -- |
|||
|
|||
|
|||
add h, y2 ; h = k + w + h + S0 + S1 + CH = t1 + S0; -- |
|||
|
|||
add h, y3 ; h = t1 + S0 + MAJ ; -- |
|||
|
|||
ROTATE_ARGS |
|||
|
|||
%endm |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
;; void sha256_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks) |
|||
;; arg 1 : pointer to input data |
|||
;; arg 2 : pointer to digest |
|||
;; arg 3 : Num blocks |
|||
section .text |
|||
global sha256_rorx |
|||
align 32 |
|||
sha256_rorx: |
|||
push rbx |
|||
%ifndef LINUX |
|||
push rsi |
|||
push rdi |
|||
%endif |
|||
push rbp |
|||
push r12 |
|||
push r13 |
|||
push r14 |
|||
push r15 |
|||
|
|||
mov rax, rsp |
|||
sub rsp,STACK_SIZE |
|||
and rsp, -32 |
|||
mov [rsp + _RSP], rax |
|||
|
|||
%ifndef LINUX |
|||
vmovdqa [rsp + _XMM_SAVE + 0*16],xmm6 |
|||
vmovdqa [rsp + _XMM_SAVE + 1*16],xmm7 |
|||
vmovdqa [rsp + _XMM_SAVE + 2*16],xmm8 |
|||
vmovdqa [rsp + _XMM_SAVE + 3*16],xmm9 |
|||
vmovdqa [rsp + _XMM_SAVE + 4*16],xmm10 |
|||
vmovdqa [rsp + _XMM_SAVE + 5*16],xmm11 |
|||
vmovdqa [rsp + _XMM_SAVE + 6*16],xmm12 |
|||
vmovdqa [rsp + _XMM_SAVE + 7*16],xmm13 |
|||
%endif |
|||
|
|||
shl NUM_BLKS, 6 ; convert to bytes |
|||
jz done_hash |
|||
lea NUM_BLKS, [NUM_BLKS + INP - 64] ; pointer to last block |
|||
mov [rsp + _INP_END], NUM_BLKS |
|||
|
|||
cmp INP, NUM_BLKS |
|||
je only_one_block |
|||
|
|||
;; load initial digest |
|||
mov a,[4*0 + CTX] |
|||
mov b,[4*1 + CTX] |
|||
mov c,[4*2 + CTX] |
|||
mov d,[4*3 + CTX] |
|||
mov e,[4*4 + CTX] |
|||
mov f,[4*5 + CTX] |
|||
mov g,[4*6 + CTX] |
|||
mov h,[4*7 + CTX] |
|||
|
|||
vmovdqa BYTE_FLIP_MASK, [PSHUFFLE_BYTE_FLIP_MASK wrt rip] |
|||
vmovdqa SHUF_00BA, [_SHUF_00BA wrt rip] |
|||
vmovdqa SHUF_DC00, [_SHUF_DC00 wrt rip] |
|||
|
|||
mov [rsp + _CTX], CTX |
|||
|
|||
loop0: |
|||
lea TBL,[K256 wrt rip] |
|||
|
|||
;; Load first 16 dwords from two blocks |
|||
VMOVDQ XTMP0, [INP + 0*32] |
|||
VMOVDQ XTMP1, [INP + 1*32] |
|||
VMOVDQ XTMP2, [INP + 2*32] |
|||
VMOVDQ XTMP3, [INP + 3*32] |
|||
|
|||
;; byte swap data |
|||
vpshufb XTMP0, XTMP0, BYTE_FLIP_MASK |
|||
vpshufb XTMP1, XTMP1, BYTE_FLIP_MASK |
|||
vpshufb XTMP2, XTMP2, BYTE_FLIP_MASK |
|||
vpshufb XTMP3, XTMP3, BYTE_FLIP_MASK |
|||
|
|||
;; transpose data into high/low halves |
|||
vperm2i128 X0, XTMP0, XTMP2, 0x20 |
|||
vperm2i128 X1, XTMP0, XTMP2, 0x31 |
|||
vperm2i128 X2, XTMP1, XTMP3, 0x20 |
|||
vperm2i128 X3, XTMP1, XTMP3, 0x31 |
|||
|
|||
last_block_enter: |
|||
add INP, 64 |
|||
mov [rsp + _INP], INP |
|||
|
|||
;; schedule 48 input dwords, by doing 3 rounds of 12 each |
|||
xor SRND, SRND |
|||
|
|||
align 16 |
|||
loop1: |
|||
vpaddd XFER, X0, [TBL + SRND + 0*32] |
|||
vmovdqa [rsp + _XFER + SRND + 0*32], XFER |
|||
FOUR_ROUNDS_AND_SCHED rsp + _XFER + SRND + 0*32 |
|||
|
|||
vpaddd XFER, X0, [TBL + SRND + 1*32] |
|||
vmovdqa [rsp + _XFER + SRND + 1*32], XFER |
|||
FOUR_ROUNDS_AND_SCHED rsp + _XFER + SRND + 1*32 |
|||
|
|||
vpaddd XFER, X0, [TBL + SRND + 2*32] |
|||
vmovdqa [rsp + _XFER + SRND + 2*32], XFER |
|||
FOUR_ROUNDS_AND_SCHED rsp + _XFER + SRND + 2*32 |
|||
|
|||
vpaddd XFER, X0, [TBL + SRND + 3*32] |
|||
vmovdqa [rsp + _XFER + SRND + 3*32], XFER |
|||
FOUR_ROUNDS_AND_SCHED rsp + _XFER + SRND + 3*32 |
|||
|
|||
add SRND, 4*32 |
|||
cmp SRND, 3 * 4*32 |
|||
jb loop1 |
|||
|
|||
loop2: |
|||
;; Do last 16 rounds with no scheduling |
|||
vpaddd XFER, X0, [TBL + SRND + 0*32] |
|||
vmovdqa [rsp + _XFER + SRND + 0*32], XFER |
|||
DO_4ROUNDS rsp + _XFER + SRND + 0*32 |
|||
vpaddd XFER, X1, [TBL + SRND + 1*32] |
|||
vmovdqa [rsp + _XFER + SRND + 1*32], XFER |
|||
DO_4ROUNDS rsp + _XFER + SRND + 1*32 |
|||
add SRND, 2*32 |
|||
|
|||
vmovdqa X0, X2 |
|||
vmovdqa X1, X3 |
|||
|
|||
cmp SRND, 4 * 4*32 |
|||
jb loop2 |
|||
|
|||
mov CTX, [rsp + _CTX] |
|||
mov INP, [rsp + _INP] |
|||
|
|||
addm [4*0 + CTX],a |
|||
addm [4*1 + CTX],b |
|||
addm [4*2 + CTX],c |
|||
addm [4*3 + CTX],d |
|||
addm [4*4 + CTX],e |
|||
addm [4*5 + CTX],f |
|||
addm [4*6 + CTX],g |
|||
addm [4*7 + CTX],h |
|||
|
|||
cmp INP, [rsp + _INP_END] |
|||
ja done_hash |
|||
|
|||
;;;; Do second block using previously scheduled results |
|||
xor SRND, SRND |
|||
align 16 |
|||
loop3: |
|||
DO_4ROUNDS rsp + _XFER + SRND + 0*32 + 16 |
|||
DO_4ROUNDS rsp + _XFER + SRND + 1*32 + 16 |
|||
add SRND, 2*32 |
|||
cmp SRND, 4 * 4*32 |
|||
jb loop3 |
|||
|
|||
mov CTX, [rsp + _CTX] |
|||
mov INP, [rsp + _INP] |
|||
add INP, 64 |
|||
|
|||
addm [4*0 + CTX],a |
|||
addm [4*1 + CTX],b |
|||
addm [4*2 + CTX],c |
|||
addm [4*3 + CTX],d |
|||
addm [4*4 + CTX],e |
|||
addm [4*5 + CTX],f |
|||
addm [4*6 + CTX],g |
|||
addm [4*7 + CTX],h |
|||
|
|||
cmp INP, [rsp + _INP_END] |
|||
jb loop0 |
|||
ja done_hash |
|||
|
|||
do_last_block: |
|||
;;;; do last block |
|||
lea TBL,[K256 wrt rip] |
|||
|
|||
VMOVDQ XWORD0, [INP + 0*16] |
|||
VMOVDQ XWORD1, [INP + 1*16] |
|||
VMOVDQ XWORD2, [INP + 2*16] |
|||
VMOVDQ XWORD3, [INP + 3*16] |
|||
|
|||
vpshufb XWORD0, XWORD0, X_BYTE_FLIP_MASK |
|||
vpshufb XWORD1, XWORD1, X_BYTE_FLIP_MASK |
|||
vpshufb XWORD2, XWORD2, X_BYTE_FLIP_MASK |
|||
vpshufb XWORD3, XWORD3, X_BYTE_FLIP_MASK |
|||
|
|||
jmp last_block_enter |
|||
|
|||
only_one_block: |
|||
|
|||
;; load initial digest |
|||
mov a,[4*0 + CTX] |
|||
mov b,[4*1 + CTX] |
|||
mov c,[4*2 + CTX] |
|||
mov d,[4*3 + CTX] |
|||
mov e,[4*4 + CTX] |
|||
mov f,[4*5 + CTX] |
|||
mov g,[4*6 + CTX] |
|||
mov h,[4*7 + CTX] |
|||
|
|||
vmovdqa BYTE_FLIP_MASK, [PSHUFFLE_BYTE_FLIP_MASK wrt rip] |
|||
vmovdqa SHUF_00BA, [_SHUF_00BA wrt rip] |
|||
vmovdqa SHUF_DC00, [_SHUF_DC00 wrt rip] |
|||
|
|||
mov [rsp + _CTX], CTX |
|||
jmp do_last_block |
|||
|
|||
done_hash: |
|||
%ifndef LINUX |
|||
vmovdqa xmm6,[rsp + _XMM_SAVE + 0*16] |
|||
vmovdqa xmm7,[rsp + _XMM_SAVE + 1*16] |
|||
vmovdqa xmm8,[rsp + _XMM_SAVE + 2*16] |
|||
vmovdqa xmm9,[rsp + _XMM_SAVE + 3*16] |
|||
vmovdqa xmm10,[rsp + _XMM_SAVE + 4*16] |
|||
vmovdqa xmm11,[rsp + _XMM_SAVE + 5*16] |
|||
vmovdqa xmm12,[rsp + _XMM_SAVE + 6*16] |
|||
vmovdqa xmm13,[rsp + _XMM_SAVE + 7*16] |
|||
%endif |
|||
|
|||
mov rsp, [rsp + _RSP] |
|||
|
|||
pop r15 |
|||
pop r14 |
|||
pop r13 |
|||
pop r12 |
|||
pop rbp |
|||
%ifndef LINUX |
|||
pop rdi |
|||
pop rsi |
|||
%endif |
|||
pop rbx |
|||
|
|||
ret |
|||
|
|||
section .data |
|||
align 64 |
|||
K256: |
|||
dd 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 |
|||
dd 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 |
|||
dd 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 |
|||
dd 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 |
|||
dd 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 |
|||
dd 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 |
|||
dd 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 |
|||
dd 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 |
|||
dd 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc |
|||
dd 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc |
|||
dd 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da |
|||
dd 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da |
|||
dd 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 |
|||
dd 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 |
|||
dd 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 |
|||
dd 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 |
|||
dd 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 |
|||
dd 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 |
|||
dd 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 |
|||
dd 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 |
|||
dd 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 |
|||
dd 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 |
|||
dd 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 |
|||
dd 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 |
|||
dd 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 |
|||
dd 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 |
|||
dd 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 |
|||
dd 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 |
|||
dd 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 |
|||
dd 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 |
|||
dd 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 |
|||
dd 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 |
|||
|
|||
PSHUFFLE_BYTE_FLIP_MASK: |
|||
ddq 0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203 |
|||
|
|||
; shuffle xBxA -> 00BA |
|||
_SHUF_00BA: |
|||
ddq 0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100 |
|||
|
|||
; shuffle xDxC -> DC00 |
|||
_SHUF_DC00: |
|||
ddq 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF |
File diff suppressed because it is too large
@ -0,0 +1,544 @@ |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
; Copyright (c) 2012, Intel Corporation |
|||
; |
|||
; All rights reserved. |
|||
; |
|||
; Redistribution and use in source and binary forms, with or without |
|||
; modification, are permitted provided that the following conditions are |
|||
; met: |
|||
; |
|||
; * Redistributions of source code must retain the above copyright |
|||
; notice, this list of conditions and the following disclaimer. |
|||
; |
|||
; * Redistributions in binary form must reproduce the above copyright |
|||
; notice, this list of conditions and the following disclaimer in the |
|||
; documentation and/or other materials provided with the |
|||
; distribution. |
|||
; |
|||
; * Neither the name of the Intel Corporation nor the names of its |
|||
; contributors may be used to endorse or promote products derived from |
|||
; this software without specific prior written permission. |
|||
; |
|||
; |
|||
; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY |
|||
; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|||
; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|||
; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR |
|||
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|||
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|||
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|||
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
|||
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|||
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|||
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
; |
|||
; Example YASM command lines: |
|||
; Windows: yasm -Xvc -f x64 -rnasm -pnasm -o sha256_sse4.obj -g cv8 sha256_sse4.asm |
|||
; Linux: yasm -f x64 -f elf64 -X gnu -g dwarf2 -D LINUX -o sha256_sse4.o sha256_sse4.asm |
|||
; |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
; |
|||
; This code is described in an Intel White-Paper: |
|||
; "Fast SHA-256 Implementations on Intel Architecture Processors" |
|||
; |
|||
; To find it, surf to http://www.intel.com/p/en_US/embedded |
|||
; and search for that title. |
|||
; The paper is expected to be released roughly at the end of April, 2012 |
|||
; |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
; This code schedules 1 blocks at a time, with 4 lanes per block |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
%define MOVDQ movdqu ;; assume buffers not aligned |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros |
|||
|
|||
; addm [mem], reg |
|||
; Add reg to mem using reg-mem add and store |
|||
%macro addm 2 |
|||
add %2, %1 |
|||
mov %1, %2 |
|||
%endm |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
; COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask |
|||
; Load xmm with mem and byte swap each dword |
|||
%macro COPY_XMM_AND_BSWAP 3 |
|||
MOVDQ %1, %2 |
|||
pshufb %1, %3 |
|||
%endmacro |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
%define X0 xmm4 |
|||
%define X1 xmm5 |
|||
%define X2 xmm6 |
|||
%define X3 xmm7 |
|||
|
|||
%define XTMP0 xmm0 |
|||
%define XTMP1 xmm1 |
|||
%define XTMP2 xmm2 |
|||
%define XTMP3 xmm3 |
|||
%define XTMP4 xmm8 |
|||
%define XFER xmm9 |
|||
|
|||
%define SHUF_00BA xmm10 ; shuffle xBxA -> 00BA |
|||
%define SHUF_DC00 xmm11 ; shuffle xDxC -> DC00 |
|||
%define BYTE_FLIP_MASK xmm12 |
|||
|
|||
%ifdef LINUX |
|||
%define NUM_BLKS rdx ; 3rd arg |
|||
%define CTX rsi ; 2nd arg |
|||
%define INP rdi ; 1st arg |
|||
|
|||
%define SRND rdi ; clobbers INP |
|||
%define c ecx |
|||
%define d r8d |
|||
%define e edx |
|||
%else |
|||
%define NUM_BLKS r8 ; 3rd arg |
|||
%define CTX rdx ; 2nd arg |
|||
%define INP rcx ; 1st arg |
|||
|
|||
%define SRND rcx ; clobbers INP |
|||
%define c edi |
|||
%define d esi |
|||
%define e r8d |
|||
|
|||
%endif |
|||
%define TBL rbp |
|||
%define a eax |
|||
%define b ebx |
|||
|
|||
%define f r9d |
|||
%define g r10d |
|||
%define h r11d |
|||
|
|||
%define y0 r13d |
|||
%define y1 r14d |
|||
%define y2 r15d |
|||
|
|||
|
|||
|
|||
_INP_END_SIZE equ 8 |
|||
_INP_SIZE equ 8 |
|||
_XFER_SIZE equ 8 |
|||
%ifdef LINUX |
|||
_XMM_SAVE_SIZE equ 0 |
|||
%else |
|||
_XMM_SAVE_SIZE equ 7*16 |
|||
%endif |
|||
; STACK_SIZE plus pushes must be an odd multiple of 8 |
|||
_ALIGN_SIZE equ 8 |
|||
|
|||
_INP_END equ 0 |
|||
_INP equ _INP_END + _INP_END_SIZE |
|||
_XFER equ _INP + _INP_SIZE |
|||
_XMM_SAVE equ _XFER + _XFER_SIZE + _ALIGN_SIZE |
|||
STACK_SIZE equ _XMM_SAVE + _XMM_SAVE_SIZE |
|||
|
|||
; rotate_Xs |
|||
; Rotate values of symbols X0...X3 |
|||
%macro rotate_Xs 0 |
|||
%xdefine X_ X0 |
|||
%xdefine X0 X1 |
|||
%xdefine X1 X2 |
|||
%xdefine X2 X3 |
|||
%xdefine X3 X_ |
|||
%endm |
|||
|
|||
; ROTATE_ARGS |
|||
; Rotate values of symbols a...h |
|||
%macro ROTATE_ARGS 0 |
|||
%xdefine TMP_ h |
|||
%xdefine h g |
|||
%xdefine g f |
|||
%xdefine f e |
|||
%xdefine e d |
|||
%xdefine d c |
|||
%xdefine c b |
|||
%xdefine b a |
|||
%xdefine a TMP_ |
|||
%endm |
|||
|
|||
%macro FOUR_ROUNDS_AND_SCHED 0 |
|||
;; compute s0 four at a time and s1 two at a time |
|||
;; compute W[-16] + W[-7] 4 at a time |
|||
movdqa XTMP0, X3 |
|||
mov y0, e ; y0 = e |
|||
ror y0, (25-11) ; y0 = e >> (25-11) |
|||
mov y1, a ; y1 = a |
|||
palignr XTMP0, X2, 4 ; XTMP0 = W[-7] |
|||
ror y1, (22-13) ; y1 = a >> (22-13) |
|||
xor y0, e ; y0 = e ^ (e >> (25-11)) |
|||
mov y2, f ; y2 = f |
|||
ror y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6)) |
|||
movdqa XTMP1, X1 |
|||
xor y1, a ; y1 = a ^ (a >> (22-13) |
|||
xor y2, g ; y2 = f^g |
|||
paddd XTMP0, X0 ; XTMP0 = W[-7] + W[-16] |
|||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) |
|||
and y2, e ; y2 = (f^g)&e |
|||
ror y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2)) |
|||
;; compute s0 |
|||
palignr XTMP1, X0, 4 ; XTMP1 = W[-15] |
|||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) |
|||
ror y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g |
|||
movdqa XTMP2, XTMP1 ; XTMP2 = W[-15] |
|||
ror y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) |
|||
add y2, y0 ; y2 = S1 + CH |
|||
add y2, [rsp + _XFER + 0*4] ; y2 = k + w + S1 + CH |
|||
movdqa XTMP3, XTMP1 ; XTMP3 = W[-15] |
|||
mov y0, a ; y0 = a |
|||
add h, y2 ; h = h + S1 + CH + k + w |
|||
mov y2, a ; y2 = a |
|||
pslld XTMP1, (32-7) |
|||
or y0, c ; y0 = a|c |
|||
add d, h ; d = d + h + S1 + CH + k + w |
|||
and y2, c ; y2 = a&c |
|||
psrld XTMP2, 7 |
|||
and y0, b ; y0 = (a|c)&b |
|||
add h, y1 ; h = h + S1 + CH + k + w + S0 |
|||
por XTMP1, XTMP2 ; XTMP1 = W[-15] ror 7 |
|||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c) |
|||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ |
|||
|
|||
ROTATE_ARGS |
|||
movdqa XTMP2, XTMP3 ; XTMP2 = W[-15] |
|||
mov y0, e ; y0 = e |
|||
mov y1, a ; y1 = a |
|||
movdqa XTMP4, XTMP3 ; XTMP4 = W[-15] |
|||
ror y0, (25-11) ; y0 = e >> (25-11) |
|||
xor y0, e ; y0 = e ^ (e >> (25-11)) |
|||
mov y2, f ; y2 = f |
|||
ror y1, (22-13) ; y1 = a >> (22-13) |
|||
pslld XTMP3, (32-18) |
|||
xor y1, a ; y1 = a ^ (a >> (22-13) |
|||
ror y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6)) |
|||
xor y2, g ; y2 = f^g |
|||
psrld XTMP2, 18 |
|||
ror y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2)) |
|||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) |
|||
and y2, e ; y2 = (f^g)&e |
|||
ror y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) |
|||
pxor XTMP1, XTMP3 |
|||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g |
|||
psrld XTMP4, 3 ; XTMP4 = W[-15] >> 3 |
|||
add y2, y0 ; y2 = S1 + CH |
|||
add y2, [rsp + _XFER + 1*4] ; y2 = k + w + S1 + CH |
|||
ror y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) |
|||
pxor XTMP1, XTMP2 ; XTMP1 = W[-15] ror 7 ^ W[-15] ror 18 |
|||
mov y0, a ; y0 = a |
|||
add h, y2 ; h = h + S1 + CH + k + w |
|||
mov y2, a ; y2 = a |
|||
pxor XTMP1, XTMP4 ; XTMP1 = s0 |
|||
or y0, c ; y0 = a|c |
|||
add d, h ; d = d + h + S1 + CH + k + w |
|||
and y2, c ; y2 = a&c |
|||
;; compute low s1 |
|||
pshufd XTMP2, X3, 11111010b ; XTMP2 = W[-2] {BBAA} |
|||
and y0, b ; y0 = (a|c)&b |
|||
add h, y1 ; h = h + S1 + CH + k + w + S0 |
|||
paddd XTMP0, XTMP1 ; XTMP0 = W[-16] + W[-7] + s0 |
|||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c) |
|||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ |
|||
|
|||
ROTATE_ARGS |
|||
movdqa XTMP3, XTMP2 ; XTMP3 = W[-2] {BBAA} |
|||
mov y0, e ; y0 = e |
|||
mov y1, a ; y1 = a |
|||
ror y0, (25-11) ; y0 = e >> (25-11) |
|||
movdqa XTMP4, XTMP2 ; XTMP4 = W[-2] {BBAA} |
|||
xor y0, e ; y0 = e ^ (e >> (25-11)) |
|||
ror y1, (22-13) ; y1 = a >> (22-13) |
|||
mov y2, f ; y2 = f |
|||
xor y1, a ; y1 = a ^ (a >> (22-13) |
|||
ror y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6)) |
|||
psrlq XTMP2, 17 ; XTMP2 = W[-2] ror 17 {xBxA} |
|||
xor y2, g ; y2 = f^g |
|||
psrlq XTMP3, 19 ; XTMP3 = W[-2] ror 19 {xBxA} |
|||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) |
|||
and y2, e ; y2 = (f^g)&e |
|||
psrld XTMP4, 10 ; XTMP4 = W[-2] >> 10 {BBAA} |
|||
ror y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2)) |
|||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g |
|||
ror y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) |
|||
pxor XTMP2, XTMP3 |
|||
add y2, y0 ; y2 = S1 + CH |
|||
ror y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) |
|||
add y2, [rsp + _XFER + 2*4] ; y2 = k + w + S1 + CH |
|||
pxor XTMP4, XTMP2 ; XTMP4 = s1 {xBxA} |
|||
mov y0, a ; y0 = a |
|||
add h, y2 ; h = h + S1 + CH + k + w |
|||
mov y2, a ; y2 = a |
|||
pshufb XTMP4, SHUF_00BA ; XTMP4 = s1 {00BA} |
|||
or y0, c ; y0 = a|c |
|||
add d, h ; d = d + h + S1 + CH + k + w |
|||
and y2, c ; y2 = a&c |
|||
paddd XTMP0, XTMP4 ; XTMP0 = {..., ..., W[1], W[0]} |
|||
and y0, b ; y0 = (a|c)&b |
|||
add h, y1 ; h = h + S1 + CH + k + w + S0 |
|||
;; compute high s1 |
|||
pshufd XTMP2, XTMP0, 01010000b ; XTMP2 = W[-2] {DDCC} |
|||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c) |
|||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ |
|||
|
|||
ROTATE_ARGS |
|||
movdqa XTMP3, XTMP2 ; XTMP3 = W[-2] {DDCC} |
|||
mov y0, e ; y0 = e |
|||
ror y0, (25-11) ; y0 = e >> (25-11) |
|||
mov y1, a ; y1 = a |
|||
movdqa X0, XTMP2 ; X0 = W[-2] {DDCC} |
|||
ror y1, (22-13) ; y1 = a >> (22-13) |
|||
xor y0, e ; y0 = e ^ (e >> (25-11)) |
|||
mov y2, f ; y2 = f |
|||
ror y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6)) |
|||
psrlq XTMP2, 17 ; XTMP2 = W[-2] ror 17 {xDxC} |
|||
xor y1, a ; y1 = a ^ (a >> (22-13) |
|||
xor y2, g ; y2 = f^g |
|||
psrlq XTMP3, 19 ; XTMP3 = W[-2] ror 19 {xDxC} |
|||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) |
|||
and y2, e ; y2 = (f^g)&e |
|||
ror y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2)) |
|||
psrld X0, 10 ; X0 = W[-2] >> 10 {DDCC} |
|||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) |
|||
ror y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g |
|||
pxor XTMP2, XTMP3 |
|||
ror y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) |
|||
add y2, y0 ; y2 = S1 + CH |
|||
add y2, [rsp + _XFER + 3*4] ; y2 = k + w + S1 + CH |
|||
pxor X0, XTMP2 ; X0 = s1 {xDxC} |
|||
mov y0, a ; y0 = a |
|||
add h, y2 ; h = h + S1 + CH + k + w |
|||
mov y2, a ; y2 = a |
|||
pshufb X0, SHUF_DC00 ; X0 = s1 {DC00} |
|||
or y0, c ; y0 = a|c |
|||
add d, h ; d = d + h + S1 + CH + k + w |
|||
and y2, c ; y2 = a&c |
|||
paddd X0, XTMP0 ; X0 = {W[3], W[2], W[1], W[0]} |
|||
and y0, b ; y0 = (a|c)&b |
|||
add h, y1 ; h = h + S1 + CH + k + w + S0 |
|||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c) |
|||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ |
|||
|
|||
ROTATE_ARGS |
|||
rotate_Xs |
|||
%endm |
|||
|
|||
;; input is [rsp + _XFER + %1 * 4] |
|||
%macro DO_ROUND 1 |
|||
mov y0, e ; y0 = e |
|||
ror y0, (25-11) ; y0 = e >> (25-11) |
|||
mov y1, a ; y1 = a |
|||
xor y0, e ; y0 = e ^ (e >> (25-11)) |
|||
ror y1, (22-13) ; y1 = a >> (22-13) |
|||
mov y2, f ; y2 = f |
|||
xor y1, a ; y1 = a ^ (a >> (22-13) |
|||
ror y0, (11-6) ; y0 = (e >> (11-6)) ^ (e >> (25-6)) |
|||
xor y2, g ; y2 = f^g |
|||
xor y0, e ; y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) |
|||
ror y1, (13-2) ; y1 = (a >> (13-2)) ^ (a >> (22-2)) |
|||
and y2, e ; y2 = (f^g)&e |
|||
xor y1, a ; y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) |
|||
ror y0, 6 ; y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) |
|||
xor y2, g ; y2 = CH = ((f^g)&e)^g |
|||
add y2, y0 ; y2 = S1 + CH |
|||
ror y1, 2 ; y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) |
|||
add y2, [rsp + _XFER + %1 * 4] ; y2 = k + w + S1 + CH |
|||
mov y0, a ; y0 = a |
|||
add h, y2 ; h = h + S1 + CH + k + w |
|||
mov y2, a ; y2 = a |
|||
or y0, c ; y0 = a|c |
|||
add d, h ; d = d + h + S1 + CH + k + w |
|||
and y2, c ; y2 = a&c |
|||
and y0, b ; y0 = (a|c)&b |
|||
add h, y1 ; h = h + S1 + CH + k + w + S0 |
|||
or y0, y2 ; y0 = MAJ = (a|c)&b)|(a&c) |
|||
add h, y0 ; h = h + S1 + CH + k + w + S0 + MAJ |
|||
ROTATE_ARGS |
|||
%endm |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
|||
;; void sha256_sse4(void *input_data, UINT32 digest[8], UINT64 num_blks) |
|||
;; arg 1 : pointer to input data |
|||
;; arg 2 : pointer to digest |
|||
;; arg 3 : Num blocks |
|||
section .text |
|||
global sha256_sse4 |
|||
align 32 |
|||
sha256_sse4: |
|||
push rbx |
|||
%ifndef LINUX |
|||
push rsi |
|||
push rdi |
|||
%endif |
|||
push rbp |
|||
push r13 |
|||
push r14 |
|||
push r15 |
|||
|
|||
sub rsp,STACK_SIZE |
|||
%ifndef LINUX |
|||
movdqa [rsp + _XMM_SAVE + 0*16],xmm6 |
|||
movdqa [rsp + _XMM_SAVE + 1*16],xmm7 |
|||
movdqa [rsp + _XMM_SAVE + 2*16],xmm8 |
|||
movdqa [rsp + _XMM_SAVE + 3*16],xmm9 |
|||
movdqa [rsp + _XMM_SAVE + 4*16],xmm10 |
|||
movdqa [rsp + _XMM_SAVE + 5*16],xmm11 |
|||
movdqa [rsp + _XMM_SAVE + 6*16],xmm12 |
|||
%endif |
|||
|
|||
shl NUM_BLKS, 6 ; convert to bytes |
|||
jz done_hash |
|||
add NUM_BLKS, INP ; pointer to end of data |
|||
mov [rsp + _INP_END], NUM_BLKS |
|||
|
|||
;; load initial digest |
|||
mov a,[4*0 + CTX] |
|||
mov b,[4*1 + CTX] |
|||
mov c,[4*2 + CTX] |
|||
mov d,[4*3 + CTX] |
|||
mov e,[4*4 + CTX] |
|||
mov f,[4*5 + CTX] |
|||
mov g,[4*6 + CTX] |
|||
mov h,[4*7 + CTX] |
|||
|
|||
movdqa BYTE_FLIP_MASK, [PSHUFFLE_BYTE_FLIP_MASK wrt rip] |
|||
movdqa SHUF_00BA, [_SHUF_00BA wrt rip] |
|||
movdqa SHUF_DC00, [_SHUF_DC00 wrt rip] |
|||
|
|||
loop0: |
|||
lea TBL,[K256 wrt rip] |
|||
|
|||
;; byte swap first 16 dwords |
|||
COPY_XMM_AND_BSWAP X0, [INP + 0*16], BYTE_FLIP_MASK |
|||
COPY_XMM_AND_BSWAP X1, [INP + 1*16], BYTE_FLIP_MASK |
|||
COPY_XMM_AND_BSWAP X2, [INP + 2*16], BYTE_FLIP_MASK |
|||
COPY_XMM_AND_BSWAP X3, [INP + 3*16], BYTE_FLIP_MASK |
|||
|
|||
mov [rsp + _INP], INP |
|||
|
|||
;; schedule 48 input dwords, by doing 3 rounds of 16 each |
|||
mov SRND, 3 |
|||
align 16 |
|||
loop1: |
|||
movdqa XFER, [TBL + 0*16] |
|||
paddd XFER, X0 |
|||
movdqa [rsp + _XFER], XFER |
|||
FOUR_ROUNDS_AND_SCHED |
|||
|
|||
movdqa XFER, [TBL + 1*16] |
|||
paddd XFER, X0 |
|||
movdqa [rsp + _XFER], XFER |
|||
FOUR_ROUNDS_AND_SCHED |
|||
|
|||
movdqa XFER, [TBL + 2*16] |
|||
paddd XFER, X0 |
|||
movdqa [rsp + _XFER], XFER |
|||
FOUR_ROUNDS_AND_SCHED |
|||
|
|||
movdqa XFER, [TBL + 3*16] |
|||
paddd XFER, X0 |
|||
movdqa [rsp + _XFER], XFER |
|||
add TBL, 4*16 |
|||
FOUR_ROUNDS_AND_SCHED |
|||
|
|||
sub SRND, 1 |
|||
jne loop1 |
|||
|
|||
mov SRND, 2 |
|||
loop2: |
|||
paddd X0, [TBL + 0*16] |
|||
movdqa [rsp + _XFER], X0 |
|||
DO_ROUND 0 |
|||
DO_ROUND 1 |
|||
DO_ROUND 2 |
|||
DO_ROUND 3 |
|||
paddd X1, [TBL + 1*16] |
|||
movdqa [rsp + _XFER], X1 |
|||
add TBL, 2*16 |
|||
DO_ROUND 0 |
|||
DO_ROUND 1 |
|||
DO_ROUND 2 |
|||
DO_ROUND 3 |
|||
|
|||
movdqa X0, X2 |
|||
movdqa X1, X3 |
|||
|
|||
sub SRND, 1 |
|||
jne loop2 |
|||
|
|||
addm [4*0 + CTX],a |
|||
addm [4*1 + CTX],b |
|||
addm [4*2 + CTX],c |
|||
addm [4*3 + CTX],d |
|||
addm [4*4 + CTX],e |
|||
addm [4*5 + CTX],f |
|||
addm [4*6 + CTX],g |
|||
addm [4*7 + CTX],h |
|||
|
|||
mov INP, [rsp + _INP] |
|||
add INP, 64 |
|||
cmp INP, [rsp + _INP_END] |
|||
jne loop0 |
|||
|
|||
done_hash: |
|||
%ifndef LINUX |
|||
movdqa xmm6,[rsp + _XMM_SAVE + 0*16] |
|||
movdqa xmm7,[rsp + _XMM_SAVE + 1*16] |
|||
movdqa xmm8,[rsp + _XMM_SAVE + 2*16] |
|||
movdqa xmm9,[rsp + _XMM_SAVE + 3*16] |
|||
movdqa xmm10,[rsp + _XMM_SAVE + 4*16] |
|||
movdqa xmm11,[rsp + _XMM_SAVE + 5*16] |
|||
movdqa xmm12,[rsp + _XMM_SAVE + 6*16] |
|||
%endif |
|||
|
|||
add rsp, STACK_SIZE |
|||
|
|||
pop r15 |
|||
pop r14 |
|||
pop r13 |
|||
pop rbp |
|||
%ifndef LINUX |
|||
pop rdi |
|||
pop rsi |
|||
%endif |
|||
pop rbx |
|||
|
|||
ret |
|||
|
|||
|
|||
section .data |
|||
align 64 |
|||
K256: |
|||
dd 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 |
|||
dd 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 |
|||
dd 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 |
|||
dd 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 |
|||
dd 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc |
|||
dd 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da |
|||
dd 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 |
|||
dd 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 |
|||
dd 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 |
|||
dd 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 |
|||
dd 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 |
|||
dd 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 |
|||
dd 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 |
|||
dd 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 |
|||
dd 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 |
|||
dd 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 |
|||
|
|||
PSHUFFLE_BYTE_FLIP_MASK: ddq 0x0c0d0e0f08090a0b0405060700010203 |
|||
|
|||
; shuffle xBxA -> 00BA |
|||
_SHUF_00BA: ddq 0xFFFFFFFFFFFFFFFF0b0a090803020100 |
|||
|
|||
; shuffle xDxC -> DC00 |
|||
_SHUF_DC00: ddq 0x0b0a090803020100FFFFFFFFFFFFFFFF |
@ -0,0 +1 @@ |
|||
../../licenses/LGPL-2.1 |
@ -0,0 +1,116 @@ |
|||
#include "config.h" |
|||
#include <string.h> |
|||
#include <stdio.h> |
|||
|
|||
/** |
|||
* htable - hash table routines |
|||
* |
|||
* A hash table is an efficient structure for looking up keys. This version |
|||
* grows with usage and allows efficient deletion. |
|||
* |
|||
* Example: |
|||
* #include <ccan/htable/htable.h> |
|||
* #include <ccan/hash/hash.h> |
|||
* #include <stdio.h> |
|||
* #include <err.h> |
|||
* #include <string.h> |
|||
* |
|||
* struct name_to_digit { |
|||
* const char *name; |
|||
* unsigned int val; |
|||
* }; |
|||
* |
|||
* static struct name_to_digit map[] = { |
|||
* { "zero", 0}, |
|||
* { "one", 1 }, |
|||
* { "two", 2 }, |
|||
* { "three", 3 }, |
|||
* { "four", 4 }, |
|||
* { "five", 5 }, |
|||
* { "six", 6 }, |
|||
* { "seven", 7 }, |
|||
* { "eight", 8 }, |
|||
* { "nine", 9 } |
|||
* }; |
|||
* |
|||
* // Wrapper for rehash function pointer. |
|||
* static size_t rehash(const void *e, void *unused) |
|||
* { |
|||
* return hash_string(((struct name_to_digit *)e)->name); |
|||
* } |
|||
* |
|||
* // Comparison function. |
|||
* static bool streq(const void *e, void *string) |
|||
* { |
|||
* return strcmp(((struct name_to_digit *)e)->name, string) == 0; |
|||
* } |
|||
* |
|||
* // We let them add their own aliases, eg. --alias=v=5 |
|||
* static void add_alias(struct htable *ht, const char *alias) |
|||
* { |
|||
* char *eq; |
|||
* struct name_to_digit *n; |
|||
* |
|||
* n = malloc(sizeof(*n)); |
|||
* n->name = strdup(alias); |
|||
* |
|||
* eq = strchr(n->name, '='); |
|||
* if (!eq || ((n->val = atoi(eq+1)) == 0 && !strcmp(eq+1, "0"))) |
|||
* errx(1, "Usage: --alias=<name>=<value>"); |
|||
* *eq = '\0'; |
|||
* htable_add(ht, hash_string(n->name), n); |
|||
* } |
|||
* |
|||
* int main(int argc, char *argv[]) |
|||
* { |
|||
* struct htable ht; |
|||
* unsigned int i; |
|||
* unsigned long val; |
|||
* |
|||
* if (argc < 2) |
|||
* errx(1, "Usage: %s [--alias=<name>=<val>]... <str>...", |
|||
* argv[0]); |
|||
* |
|||
* // Create and populate hash table. |
|||
* htable_init(&ht, rehash, NULL); |
|||
* for (i = 0; i < sizeof(map)/sizeof(map[0]); i++) |
|||
* htable_add(&ht, hash_string(map[i].name), &map[i]); |
|||
* |
|||
* // Add any aliases to the hash table. |
|||
* for (i = 1; i < argc; i++) { |
|||
* if (!strncmp(argv[i], "--alias=", strlen("--alias="))) |
|||
* add_alias(&ht, argv[i] + strlen("--alias=")); |
|||
* else |
|||
* break; |
|||
* } |
|||
* |
|||
* // Find the other args in the hash table. |
|||
* for (val = 0; i < argc; i++) { |
|||
* struct name_to_digit *n; |
|||
* n = htable_get(&ht, hash_string(argv[i]), |
|||
* streq, argv[i]); |
|||
* if (!n) |
|||
* errx(1, "Invalid digit name %s", argv[i]); |
|||
* // Append it to the value we are building up. |
|||
* val *= 10; |
|||
* val += n->val; |
|||
* } |
|||
* printf("%lu\n", val); |
|||
* return 0; |
|||
* } |
|||
* |
|||
* License: LGPL (v2.1 or any later version) |
|||
* Author: Rusty Russell <rusty@rustcorp.com.au> |
|||
*/ |
|||
int main(int argc, char *argv[]) |
|||
{ |
|||
if (argc != 2) |
|||
return 1; |
|||
|
|||
if (strcmp(argv[1], "depends") == 0) { |
|||
printf("ccan/compiler\n"); |
|||
return 0; |
|||
} |
|||
|
|||
return 1; |
|||
} |
@ -0,0 +1,296 @@ |
|||
/* Licensed under LGPLv2+ - see LICENSE file for details */ |
|||
#include <ccan/htable/htable.h> |
|||
#include <ccan/compiler/compiler.h> |
|||
#include <stdlib.h> |
|||
#include <limits.h> |
|||
#include <stdbool.h> |
|||
#include <assert.h> |
|||
|
|||
/* We use 0x1 as deleted marker. */ |
|||
#define HTABLE_DELETED (0x1) |
|||
|
|||
/* We clear out the bits which are always the same, and put metadata there. */ |
|||
static inline uintptr_t get_extra_ptr_bits(const struct htable *ht, |
|||
uintptr_t e) |
|||
{ |
|||
return e & ht->common_mask; |
|||
} |
|||
|
|||
static inline void *get_raw_ptr(const struct htable *ht, uintptr_t e) |
|||
{ |
|||
return (void *)((e & ~ht->common_mask) | ht->common_bits); |
|||
} |
|||
|
|||
static inline uintptr_t make_hval(const struct htable *ht, |
|||
const void *p, uintptr_t bits) |
|||
{ |
|||
return ((uintptr_t)p & ~ht->common_mask) | bits; |
|||
} |
|||
|
|||
static inline bool entry_is_valid(uintptr_t e) |
|||
{ |
|||
return e > HTABLE_DELETED; |
|||
} |
|||
|
|||
static inline uintptr_t get_hash_ptr_bits(const struct htable *ht, |
|||
size_t hash) |
|||
{ |
|||
/* Shuffling the extra bits (as specified in mask) down the
|
|||
* end is quite expensive. But the lower bits are redundant, so |
|||
* we fold the value first. */ |
|||
return (hash ^ (hash >> ht->bits)) |
|||
& ht->common_mask & ~ht->perfect_bit; |
|||
} |
|||
|
|||
void htable_init(struct htable *ht, |
|||
size_t (*rehash)(const void *elem, void *priv), void *priv) |
|||
{ |
|||
struct htable empty = HTABLE_INITIALIZER(empty, NULL, NULL); |
|||
*ht = empty; |
|||
ht->rehash = rehash; |
|||
ht->priv = priv; |
|||
ht->table = &ht->perfect_bit; |
|||
} |
|||
|
|||
bool htable_init_sized(struct htable *ht, |
|||
size_t (*rehash)(const void *, void *), |
|||
void *priv, size_t expect) |
|||
{ |
|||
htable_init(ht, rehash, priv); |
|||
|
|||
/* Don't go insane with sizing. */ |
|||
for (ht->bits = 1; ((size_t)3 << ht->bits) / 4 < expect; ht->bits++) { |
|||
if (ht->bits == 30) |
|||
break; |
|||
} |
|||
|
|||
ht->table = calloc(1 << ht->bits, sizeof(size_t)); |
|||
if (!ht->table) { |
|||
ht->table = &ht->perfect_bit; |
|||
return false; |
|||
} |
|||
ht->max = ((size_t)3 << ht->bits) / 4; |
|||
ht->max_with_deleted = ((size_t)9 << ht->bits) / 10; |
|||
|
|||
return true; |
|||
} |
|||
|
|||
void htable_clear(struct htable *ht) |
|||
{ |
|||
if (ht->table != &ht->perfect_bit) |
|||
free((void *)ht->table); |
|||
htable_init(ht, ht->rehash, ht->priv); |
|||
} |
|||
|
|||
static size_t hash_bucket(const struct htable *ht, size_t h) |
|||
{ |
|||
return h & ((1 << ht->bits)-1); |
|||
} |
|||
|
|||
static void *htable_val(const struct htable *ht, |
|||
struct htable_iter *i, size_t hash, uintptr_t perfect) |
|||
{ |
|||
uintptr_t h2 = get_hash_ptr_bits(ht, hash) | perfect; |
|||
|
|||
while (ht->table[i->off]) { |
|||
if (ht->table[i->off] != HTABLE_DELETED) { |
|||
if (get_extra_ptr_bits(ht, ht->table[i->off]) == h2) |
|||
return get_raw_ptr(ht, ht->table[i->off]); |
|||
} |
|||
i->off = (i->off + 1) & ((1 << ht->bits)-1); |
|||
h2 &= ~perfect; |
|||
} |
|||
return NULL; |
|||
} |
|||
|
|||
void *htable_firstval(const struct htable *ht, |
|||
struct htable_iter *i, size_t hash) |
|||
{ |
|||
i->off = hash_bucket(ht, hash); |
|||
return htable_val(ht, i, hash, ht->perfect_bit); |
|||
} |
|||
|
|||
void *htable_nextval(const struct htable *ht, |
|||
struct htable_iter *i, size_t hash) |
|||
{ |
|||
i->off = (i->off + 1) & ((1 << ht->bits)-1); |
|||
return htable_val(ht, i, hash, 0); |
|||
} |
|||
|
|||
void *htable_first(const struct htable *ht, struct htable_iter *i) |
|||
{ |
|||
for (i->off = 0; i->off < (size_t)1 << ht->bits; i->off++) { |
|||
if (entry_is_valid(ht->table[i->off])) |
|||
return get_raw_ptr(ht, ht->table[i->off]); |
|||
} |
|||
return NULL; |
|||
} |
|||
|
|||
void *htable_next(const struct htable *ht, struct htable_iter *i) |
|||
{ |
|||
for (i->off++; i->off < (size_t)1 << ht->bits; i->off++) { |
|||
if (entry_is_valid(ht->table[i->off])) |
|||
return get_raw_ptr(ht, ht->table[i->off]); |
|||
} |
|||
return NULL; |
|||
} |
|||
|
|||
/* This does not expand the hash table, that's up to caller. */ |
|||
static void ht_add(struct htable *ht, const void *new, size_t h) |
|||
{ |
|||
size_t i; |
|||
uintptr_t perfect = ht->perfect_bit; |
|||
|
|||
i = hash_bucket(ht, h); |
|||
|
|||
while (entry_is_valid(ht->table[i])) { |
|||
perfect = 0; |
|||
i = (i + 1) & ((1 << ht->bits)-1); |
|||
} |
|||
ht->table[i] = make_hval(ht, new, get_hash_ptr_bits(ht, h)|perfect); |
|||
} |
|||
|
|||
static COLD bool double_table(struct htable *ht) |
|||
{ |
|||
unsigned int i; |
|||
size_t oldnum = (size_t)1 << ht->bits; |
|||
uintptr_t *oldtable, e; |
|||
|
|||
oldtable = ht->table; |
|||
ht->table = calloc(1 << (ht->bits+1), sizeof(size_t)); |
|||
if (!ht->table) { |
|||
ht->table = oldtable; |
|||
return false; |
|||
} |
|||
ht->bits++; |
|||
ht->max = ((size_t)3 << ht->bits) / 4; |
|||
ht->max_with_deleted = ((size_t)9 << ht->bits) / 10; |
|||
|
|||
/* If we lost our "perfect bit", get it back now. */ |
|||
if (!ht->perfect_bit && ht->common_mask) { |
|||
for (i = 0; i < sizeof(ht->common_mask) * CHAR_BIT; i++) { |
|||
if (ht->common_mask & ((size_t)1 << i)) { |
|||
ht->perfect_bit = (size_t)1 << i; |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
|
|||
if (oldtable != &ht->perfect_bit) { |
|||
for (i = 0; i < oldnum; i++) { |
|||
if (entry_is_valid(e = oldtable[i])) { |
|||
void *p = get_raw_ptr(ht, e); |
|||
ht_add(ht, p, ht->rehash(p, ht->priv)); |
|||
} |
|||
} |
|||
free(oldtable); |
|||
} |
|||
ht->deleted = 0; |
|||
return true; |
|||
} |
|||
|
|||
static COLD void rehash_table(struct htable *ht) |
|||
{ |
|||
size_t start, i; |
|||
uintptr_t e; |
|||
|
|||
/* Beware wrap cases: we need to start from first empty bucket. */ |
|||
for (start = 0; ht->table[start]; start++); |
|||
|
|||
for (i = 0; i < (size_t)1 << ht->bits; i++) { |
|||
size_t h = (i + start) & ((1 << ht->bits)-1); |
|||
e = ht->table[h]; |
|||
if (!e) |
|||
continue; |
|||
if (e == HTABLE_DELETED) |
|||
ht->table[h] = 0; |
|||
else if (!(e & ht->perfect_bit)) { |
|||
void *p = get_raw_ptr(ht, e); |
|||
ht->table[h] = 0; |
|||
ht_add(ht, p, ht->rehash(p, ht->priv)); |
|||
} |
|||
} |
|||
ht->deleted = 0; |
|||
} |
|||
|
|||
/* We stole some bits, now we need to put them back... */ |
|||
static COLD void update_common(struct htable *ht, const void *p) |
|||
{ |
|||
unsigned int i; |
|||
uintptr_t maskdiff, bitsdiff; |
|||
|
|||
if (ht->elems == 0) { |
|||
/* Always reveal one bit of the pointer in the bucket,
|
|||
* so it's not zero or HTABLE_DELETED (1), even if |
|||
* hash happens to be 0. Assumes (void *)1 is not a |
|||
* valid pointer. */ |
|||
for (i = sizeof(uintptr_t)*CHAR_BIT - 1; i > 0; i--) { |
|||
if ((uintptr_t)p & ((uintptr_t)1 << i)) |
|||
break; |
|||
} |
|||
|
|||
ht->common_mask = ~((uintptr_t)1 << i); |
|||
ht->common_bits = ((uintptr_t)p & ht->common_mask); |
|||
ht->perfect_bit = 1; |
|||
return; |
|||
} |
|||
|
|||
/* Find bits which are unequal to old common set. */ |
|||
maskdiff = ht->common_bits ^ ((uintptr_t)p & ht->common_mask); |
|||
|
|||
/* These are the bits which go there in existing entries. */ |
|||
bitsdiff = ht->common_bits & maskdiff; |
|||
|
|||
for (i = 0; i < (size_t)1 << ht->bits; i++) { |
|||
if (!entry_is_valid(ht->table[i])) |
|||
continue; |
|||
/* Clear the bits no longer in the mask, set them as
|
|||
* expected. */ |
|||
ht->table[i] &= ~maskdiff; |
|||
ht->table[i] |= bitsdiff; |
|||
} |
|||
|
|||
/* Take away those bits from our mask, bits and perfect bit. */ |
|||
ht->common_mask &= ~maskdiff; |
|||
ht->common_bits &= ~maskdiff; |
|||
ht->perfect_bit &= ~maskdiff; |
|||
} |
|||
|
|||
bool htable_add(struct htable *ht, size_t hash, const void *p) |
|||
{ |
|||
if (ht->elems+1 > ht->max && !double_table(ht)) |
|||
return false; |
|||
if (ht->elems+1 + ht->deleted > ht->max_with_deleted) |
|||
rehash_table(ht); |
|||
assert(p); |
|||
if (((uintptr_t)p & ht->common_mask) != ht->common_bits) |
|||
update_common(ht, p); |
|||
|
|||
ht_add(ht, p, hash); |
|||
ht->elems++; |
|||
return true; |
|||
} |
|||
|
|||
bool htable_del(struct htable *ht, size_t h, const void *p) |
|||
{ |
|||
struct htable_iter i; |
|||
void *c; |
|||
|
|||
for (c = htable_firstval(ht,&i,h); c; c = htable_nextval(ht,&i,h)) { |
|||
if (c == p) { |
|||
htable_delval(ht, &i); |
|||
return true; |
|||
} |
|||
} |
|||
return false; |
|||
} |
|||
|
|||
void htable_delval(struct htable *ht, struct htable_iter *i) |
|||
{ |
|||
assert(i->off < (size_t)1 << ht->bits); |
|||
assert(entry_is_valid(ht->table[i->off])); |
|||
|
|||
ht->elems--; |
|||
ht->table[i->off] = HTABLE_DELETED; |
|||
ht->deleted++; |
|||
} |
@ -0,0 +1,191 @@ |
|||
/* Licensed under LGPLv2+ - see LICENSE file for details */ |
|||
#ifndef CCAN_HTABLE_H |
|||
#define CCAN_HTABLE_H |
|||
#include "config.h" |
|||
#include <stdint.h> |
|||
#include <stdbool.h> |
|||
#include <stdlib.h> |
|||
|
|||
/**
|
|||
* struct htable - private definition of a htable. |
|||
* |
|||
* It's exposed here so you can put it in your structures and so we can |
|||
* supply inline functions. |
|||
*/ |
|||
struct htable { |
|||
size_t (*rehash)(const void *elem, void *priv); |
|||
void *priv; |
|||
unsigned int bits; |
|||
size_t elems, deleted, max, max_with_deleted; |
|||
/* These are the bits which are the same in all pointers. */ |
|||
uintptr_t common_mask, common_bits; |
|||
uintptr_t perfect_bit; |
|||
uintptr_t *table; |
|||
}; |
|||
|
|||
/**
|
|||
* HTABLE_INITIALIZER - static initialization for a hash table. |
|||
* @name: name of this htable. |
|||
* @rehash: hash function to use for rehashing. |
|||
* @priv: private argument to @rehash function. |
|||
* |
|||
* This is useful for setting up static and global hash tables. |
|||
* |
|||
* Example: |
|||
* // For simplicity's sake, say hash value is contents of elem.
|
|||
* static size_t rehash(const void *elem, void *unused) |
|||
* { |
|||
* return *(size_t *)elem; |
|||
* } |
|||
* static struct htable ht = HTABLE_INITIALIZER(ht, rehash, NULL); |
|||
*/ |
|||
#define HTABLE_INITIALIZER(name, rehash, priv) \ |
|||
{ rehash, priv, 0, 0, 0, 0, 0, -1, 0, 0, &name.perfect_bit } |
|||
|
|||
/**
|
|||
* htable_init - initialize an empty hash table. |
|||
* @ht: the hash table to initialize |
|||
* @rehash: hash function to use for rehashing. |
|||
* @priv: private argument to @rehash function. |
|||
*/ |
|||
void htable_init(struct htable *ht, |
|||
size_t (*rehash)(const void *elem, void *priv), void *priv); |
|||
|
|||
/**
|
|||
* htable_init_sized - initialize an empty hash table of given size. |
|||
* @ht: the hash table to initialize |
|||
* @rehash: hash function to use for rehashing. |
|||
* @priv: private argument to @rehash function. |
|||
* @size: the number of element. |
|||
* |
|||
* If this returns false, @ht is still usable, but may need to do reallocation |
|||
* upon an add. If this returns true, it will not need to reallocate within |
|||
* @size htable_adds. |
|||
*/ |
|||
bool htable_init_sized(struct htable *ht, |
|||
size_t (*rehash)(const void *elem, void *priv), |
|||
void *priv, size_t size); |
|||
|
|||
/**
|
|||
* htable_clear - empty a hash table. |
|||
* @ht: the hash table to clear |
|||
* |
|||
* This doesn't do anything to any pointers left in it. |
|||
*/ |
|||
void htable_clear(struct htable *ht); |
|||
|
|||
/**
|
|||
* htable_rehash - use a hashtree's rehash function |
|||
* @elem: the argument to rehash() |
|||
* |
|||
*/ |
|||
size_t htable_rehash(const void *elem); |
|||
|
|||
/**
|
|||
* htable_add - add a pointer into a hash table. |
|||
* @ht: the htable |
|||
* @hash: the hash value of the object |
|||
* @p: the non-NULL pointer |
|||
* |
|||
* Also note that this can only fail due to allocation failure. Otherwise, it |
|||
* returns true. |
|||
*/ |
|||
bool htable_add(struct htable *ht, size_t hash, const void *p); |
|||
|
|||
/**
|
|||
* htable_del - remove a pointer from a hash table |
|||
* @ht: the htable |
|||
* @hash: the hash value of the object |
|||
* @p: the pointer |
|||
* |
|||
* Returns true if the pointer was found (and deleted). |
|||
*/ |
|||
bool htable_del(struct htable *ht, size_t hash, const void *p); |
|||
|
|||
/**
|
|||
* struct htable_iter - iterator or htable_first or htable_firstval etc. |
|||
* |
|||
* This refers to a location inside the hashtable. |
|||
*/ |
|||
struct htable_iter { |
|||
size_t off; |
|||
}; |
|||
|
|||
/**
|
|||
* htable_firstval - find a candidate for a given hash value |
|||
* @htable: the hashtable |
|||
* @i: the struct htable_iter to initialize |
|||
* @hash: the hash value |
|||
* |
|||
* You'll need to check the value is what you want; returns NULL if none. |
|||
* See Also: |
|||
* htable_delval() |
|||
*/ |
|||
void *htable_firstval(const struct htable *htable, |
|||
struct htable_iter *i, size_t hash); |
|||
|
|||
/**
|
|||
* htable_nextval - find another candidate for a given hash value |
|||
* @htable: the hashtable |
|||
* @i: the struct htable_iter to initialize |
|||
* @hash: the hash value |
|||
* |
|||
* You'll need to check the value is what you want; returns NULL if no more. |
|||
*/ |
|||
void *htable_nextval(const struct htable *htable, |
|||
struct htable_iter *i, size_t hash); |
|||
|
|||
/**
|
|||
* htable_get - find an entry in the hash table |
|||
* @ht: the hashtable |
|||
* @h: the hash value of the entry |
|||
* @cmp: the comparison function |
|||
* @ptr: the pointer to hand to the comparison function. |
|||
* |
|||
* Convenient inline wrapper for htable_firstval/htable_nextval loop. |
|||
*/ |
|||
static inline void *htable_get(const struct htable *ht, |
|||
size_t h, |
|||
bool (*cmp)(const void *candidate, void *ptr), |
|||
const void *ptr) |
|||
{ |
|||
struct htable_iter i; |
|||
void *c; |
|||
|
|||
for (c = htable_firstval(ht,&i,h); c; c = htable_nextval(ht,&i,h)) { |
|||
if (cmp(c, (void *)ptr)) |
|||
return c; |
|||
} |
|||
return NULL; |
|||
} |
|||
|
|||
/**
|
|||
* htable_first - find an entry in the hash table |
|||
* @ht: the hashtable |
|||
* @i: the struct htable_iter to initialize |
|||
* |
|||
* Get an entry in the hashtable; NULL if empty. |
|||
*/ |
|||
void *htable_first(const struct htable *htable, struct htable_iter *i); |
|||
|
|||
/**
|
|||
* htable_next - find another entry in the hash table |
|||
* @ht: the hashtable |
|||
* @i: the struct htable_iter to use |
|||
* |
|||
* Get another entry in the hashtable; NULL if all done. |
|||
* This is usually used after htable_first or prior non-NULL htable_next. |
|||
*/ |
|||
void *htable_next(const struct htable *htable, struct htable_iter *i); |
|||
|
|||
/**
|
|||
* htable_delval - remove an iterated pointer from a hash table |
|||
* @ht: the htable |
|||
* @i: the htable_iter |
|||
* |
|||
* Usually used to delete a hash entry after it has been found with |
|||
* htable_firstval etc. |
|||
*/ |
|||
void htable_delval(struct htable *ht, struct htable_iter *i); |
|||
|
|||
#endif /* CCAN_HTABLE_H */ |
@ -0,0 +1,108 @@ |
|||
/* Licensed under LGPLv2+ - see LICENSE file for details */ |
|||
#ifndef CCAN_HTABLE_TYPE_H |
|||
#define CCAN_HTABLE_TYPE_H |
|||
#include <ccan/htable/htable.h> |
|||
#include "config.h" |
|||
|
|||
/**
|
|||
* HTABLE_DEFINE_TYPE - create a set of htable ops for a type |
|||
* @type: a type whose pointers will be values in the hash. |
|||
* @keyof: a function/macro to extract a key: <keytype> @keyof(const type *elem) |
|||
* @hashfn: a hash function for a @key: size_t @hashfn(const <keytype> *) |
|||
* @eqfn: an equality function keys: bool @eqfn(const type *, const <keytype> *) |
|||
* @prefix: a prefix for all the functions to define (of form <name>_*) |
|||
* |
|||
* NULL values may not be placed into the hash table. |
|||
* |
|||
* This defines the type hashtable type and an iterator type: |
|||
* struct <name>; |
|||
* struct <name>_iter; |
|||
* |
|||
* It also defines initialization and freeing functions: |
|||
* void <name>_init(struct <name> *); |
|||
* void <name>_init_sized(struct <name> *, size_t); |
|||
* void <name>_clear(struct <name> *); |
|||
* |
|||
* Add function only fails if we run out of memory: |
|||
* bool <name>_add(struct <name> *ht, const <type> *e); |
|||
* |
|||
* Delete and delete-by key return true if it was in the set: |
|||
* bool <name>_del(struct <name> *ht, const <type> *e); |
|||
* bool <name>_delkey(struct <name> *ht, const <keytype> *k); |
|||
* |
|||
* Find function return the matching element, or NULL: |
|||
* type *<name>_get(const struct @name *ht, const <keytype> *k); |
|||
* |
|||
* Iteration over hashtable is also supported: |
|||
* type *<name>_first(const struct <name> *ht, struct <name>_iter *i); |
|||
* type *<name>_next(const struct <name> *ht, struct <name>_iter *i); |
|||
* |
|||
* It's currently safe to iterate over a changing hashtable, but you might |
|||
* miss an element. Iteration isn't very efficient, either. |
|||
* |
|||
* You can use HTABLE_INITIALIZER like so: |
|||
* struct <name> ht = { HTABLE_INITIALIZER(ht.raw, <name>_hash, NULL) }; |
|||
*/ |
|||
#define HTABLE_DEFINE_TYPE(type, keyof, hashfn, eqfn, name) \ |
|||
struct name { struct htable raw; }; \ |
|||
struct name##_iter { struct htable_iter i; }; \ |
|||
static inline size_t name##_hash(const void *elem, void *priv) \ |
|||
{ \ |
|||
return hashfn(keyof((const type *)elem)); \ |
|||
} \ |
|||
static inline void name##_init(struct name *ht) \ |
|||
{ \ |
|||
htable_init(&ht->raw, name##_hash, NULL); \ |
|||
} \ |
|||
static inline void name##_init_sized(struct name *ht, size_t s) \ |
|||
{ \ |
|||
htable_init_sized(&ht->raw, name##_hash, NULL, s); \ |
|||
} \ |
|||
static inline void name##_clear(struct name *ht) \ |
|||
{ \ |
|||
htable_clear(&ht->raw); \ |
|||
} \ |
|||
static inline bool name##_add(struct name *ht, const type *elem) \ |
|||
{ \ |
|||
return htable_add(&ht->raw, hashfn(keyof(elem)), elem); \ |
|||
} \ |
|||
static inline bool name##_del(struct name *ht, const type *elem) \ |
|||
{ \ |
|||
return htable_del(&ht->raw, hashfn(keyof(elem)), elem); \ |
|||
} \ |
|||
static inline type *name##_get(const struct name *ht, \ |
|||
const HTABLE_KTYPE(keyof) k) \ |
|||
{ \ |
|||
/* Typecheck for eqfn */ \ |
|||
(void)sizeof(eqfn((const type *)NULL, \ |
|||
keyof((const type *)NULL))); \ |
|||
return htable_get(&ht->raw, \ |
|||
hashfn(k), \ |
|||
(bool (*)(const void *, void *))(eqfn), \ |
|||
k); \ |
|||
} \ |
|||
static inline bool name##_delkey(struct name *ht, \ |
|||
const HTABLE_KTYPE(keyof) k) \ |
|||
{ \ |
|||
type *elem = name##_get(ht, k); \ |
|||
if (elem) \ |
|||
return name##_del(ht, elem); \ |
|||
return false; \ |
|||
} \ |
|||
static inline type *name##_first(const struct name *ht, \ |
|||
struct name##_iter *iter) \ |
|||
{ \ |
|||
return htable_first(&ht->raw, &iter->i); \ |
|||
} \ |
|||
static inline type *name##_next(const struct name *ht, \ |
|||
struct name##_iter *iter) \ |
|||
{ \ |
|||
return htable_next(&ht->raw, &iter->i); \ |
|||
} |
|||
|
|||
#if HAVE_TYPEOF |
|||
#define HTABLE_KTYPE(keyof) typeof(keyof(NULL)) |
|||
#else |
|||
#define HTABLE_KTYPE(keyof) void * |
|||
#endif |
|||
#endif /* CCAN_HTABLE_TYPE_H */ |
@ -0,0 +1,36 @@ |
|||
#include <ccan/htable/htable.h> |
|||
#include <ccan/htable/htable.c> |
|||
#include <ccan/tap/tap.h> |
|||
#include <stdbool.h> |
|||
#include <string.h> |
|||
|
|||
#define NUM_VALS 512 |
|||
|
|||
/* We use the number divided by two as the hash (for lots of
|
|||
collisions). */ |
|||
static size_t hash(const void *elem, void *unused) |
|||
{ |
|||
size_t h = *(uint64_t *)elem / 2; |
|||
return h; |
|||
} |
|||
|
|||
int main(int argc, char *argv[]) |
|||
{ |
|||
struct htable ht; |
|||
uint64_t val[NUM_VALS]; |
|||
unsigned int i; |
|||
|
|||
plan_tests((NUM_VALS) * 2); |
|||
for (i = 0; i < NUM_VALS; i++) |
|||
val[i] = i; |
|||
|
|||
htable_init(&ht, hash, NULL); |
|||
for (i = 0; i < NUM_VALS; i++) { |
|||
ok1(ht.max >= i); |
|||
ok1(ht.max <= i * 2); |
|||
htable_add(&ht, hash(&val[i], NULL), &val[i]); |
|||
} |
|||
htable_clear(&ht); |
|||
|
|||
return exit_status(); |
|||
} |
@ -0,0 +1,175 @@ |
|||
#include <ccan/htable/htable_type.h> |
|||
#include <ccan/htable/htable.c> |
|||
#include <ccan/tap/tap.h> |
|||
#include <stdbool.h> |
|||
#include <string.h> |
|||
|
|||
#define NUM_BITS 7 |
|||
#define NUM_VALS (1 << NUM_BITS) |
|||
|
|||
struct obj { |
|||
/* Makes sure we don't try to treat and obj as a key or vice versa */ |
|||
unsigned char unused; |
|||
unsigned int key; |
|||
}; |
|||
|
|||
static const unsigned int *objkey(const struct obj *obj) |
|||
{ |
|||
return &obj->key; |
|||
} |
|||
|
|||
/* We use the number divided by two as the hash (for lots of
|
|||
collisions), plus set all the higher bits so we can detect if they |
|||
don't get masked out. */ |
|||
static size_t objhash(const unsigned int *key) |
|||
{ |
|||
size_t h = *key / 2; |
|||
h |= -1UL << NUM_BITS; |
|||
return h; |
|||
} |
|||
|
|||
static bool cmp(const struct obj *obj, const unsigned int *key) |
|||
{ |
|||
return obj->key == *key; |
|||
} |
|||
|
|||
HTABLE_DEFINE_TYPE(struct obj, objkey, objhash, cmp, htable_obj); |
|||
|
|||
static void add_vals(struct htable_obj *ht, |
|||
struct obj val[], unsigned int num) |
|||
{ |
|||
unsigned int i; |
|||
|
|||
for (i = 0; i < num; i++) { |
|||
if (htable_obj_get(ht, &i)) { |
|||
fail("%u already in hash", i); |
|||
return; |
|||
} |
|||
htable_obj_add(ht, &val[i]); |
|||
if (htable_obj_get(ht, &i) != &val[i]) { |
|||
fail("%u not added to hash", i); |
|||
return; |
|||
} |
|||
} |
|||
pass("Added %u numbers to hash", i); |
|||
} |
|||
|
|||
static void find_vals(const struct htable_obj *ht, |
|||
const struct obj val[], unsigned int num) |
|||
{ |
|||
unsigned int i; |
|||
|
|||
for (i = 0; i < num; i++) { |
|||
if (htable_obj_get(ht, &i) != &val[i]) { |
|||
fail("%u not found in hash", i); |
|||
return; |
|||
} |
|||
} |
|||
pass("Found %u numbers in hash", i); |
|||
} |
|||
|
|||
static void del_vals(struct htable_obj *ht, |
|||
const struct obj val[], unsigned int num) |
|||
{ |
|||
unsigned int i; |
|||
|
|||
for (i = 0; i < num; i++) { |
|||
if (!htable_obj_delkey(ht, &val[i].key)) { |
|||
fail("%u not deleted from hash", i); |
|||
return; |
|||
} |
|||
} |
|||
pass("Deleted %u numbers in hash", i); |
|||
} |
|||
|
|||
static void del_vals_bykey(struct htable_obj *ht, |
|||
const struct obj val[], unsigned int num) |
|||
{ |
|||
unsigned int i; |
|||
|
|||
for (i = 0; i < num; i++) { |
|||
if (!htable_obj_delkey(ht, &i)) { |
|||
fail("%u not deleted by key from hash", i); |
|||
return; |
|||
} |
|||
} |
|||
pass("Deleted %u numbers by key from hash", i); |
|||
} |
|||
|
|||
static bool check_mask(struct htable *ht, const struct obj val[], unsigned num) |
|||
{ |
|||
uint64_t i; |
|||
|
|||
for (i = 0; i < num; i++) { |
|||
if (((uintptr_t)&val[i] & ht->common_mask) != ht->common_bits) |
|||
return false; |
|||
} |
|||
return true; |
|||
} |
|||
|
|||
int main(int argc, char *argv[]) |
|||
{ |
|||
unsigned int i; |
|||
struct htable_obj ht; |
|||
struct obj val[NUM_VALS]; |
|||
unsigned int dne; |
|||
void *p; |
|||
struct htable_obj_iter iter; |
|||
|
|||
plan_tests(20); |
|||
for (i = 0; i < NUM_VALS; i++) |
|||
val[i].key = i; |
|||
dne = i; |
|||
|
|||
htable_obj_init(&ht); |
|||
ok1(ht.raw.max == 0); |
|||
ok1(ht.raw.bits == 0); |
|||
|
|||
/* We cannot find an entry which doesn't exist. */ |
|||
ok1(!htable_obj_get(&ht, &dne)); |
|||
|
|||
/* Fill it, it should increase in size. */ |
|||
add_vals(&ht, val, NUM_VALS); |
|||
ok1(ht.raw.bits == NUM_BITS + 1); |
|||
ok1(ht.raw.max < (1 << ht.raw.bits)); |
|||
|
|||
/* Mask should be set. */ |
|||
ok1(ht.raw.common_mask != 0); |
|||
ok1(ht.raw.common_mask != -1); |
|||
ok1(check_mask(&ht.raw, val, NUM_VALS)); |
|||
|
|||
/* Find all. */ |
|||
find_vals(&ht, val, NUM_VALS); |
|||
ok1(!htable_obj_get(&ht, &dne)); |
|||
|
|||
/* Walk once, should get them all. */ |
|||
i = 0; |
|||
for (p = htable_obj_first(&ht,&iter); p; p = htable_obj_next(&ht, &iter)) |
|||
i++; |
|||
ok1(i == NUM_VALS); |
|||
|
|||
/* Delete all. */ |
|||
del_vals(&ht, val, NUM_VALS); |
|||
ok1(!htable_obj_get(&ht, &val[0].key)); |
|||
|
|||
/* Worst case, a "pointer" which doesn't have any matching bits. */ |
|||
htable_add(&ht.raw, 0, (void *)~(uintptr_t)&val[NUM_VALS-1]); |
|||
htable_obj_add(&ht, &val[NUM_VALS-1]); |
|||
ok1(ht.raw.common_mask == 0); |
|||
ok1(ht.raw.common_bits == 0); |
|||
/* Delete the bogus one before we trip over it. */ |
|||
htable_del(&ht.raw, 0, (void *)~(uintptr_t)&val[NUM_VALS-1]); |
|||
|
|||
/* Add the rest. */ |
|||
add_vals(&ht, val, NUM_VALS-1); |
|||
|
|||
/* Check we can find them all. */ |
|||
find_vals(&ht, val, NUM_VALS); |
|||
ok1(!htable_obj_get(&ht, &dne)); |
|||
|
|||
/* Delete them all by key. */ |
|||
del_vals_bykey(&ht, val, NUM_VALS); |
|||
htable_obj_clear(&ht); |
|||
|
|||
return exit_status(); |
|||
} |
@ -0,0 +1,61 @@ |
|||
#include <ccan/htable/htable.h> |
|||
#include <ccan/htable/htable.c> |
|||
#include <ccan/tap/tap.h> |
|||
#include <stdbool.h> |
|||
|
|||
struct data { |
|||
size_t key; |
|||
}; |
|||
|
|||
/* Hash is simply key itself. */ |
|||
static size_t hash(const void *e, void *unused) |
|||
{ |
|||
struct data *d = (struct data *)e; |
|||
|
|||
return d->key; |
|||
} |
|||
|
|||
static bool eq(const void *e, void *k) |
|||
{ |
|||
struct data *d = (struct data *)e; |
|||
size_t *key = (size_t *)k; |
|||
|
|||
return (d->key == *key); |
|||
} |
|||
|
|||
int main(void) |
|||
{ |
|||
struct htable table; |
|||
struct data *d0, *d1; |
|||
|
|||
plan_tests(6); |
|||
|
|||
d1 = malloc(sizeof(struct data)); |
|||
d1->key = 1; |
|||
d0 = malloc(sizeof(struct data)); |
|||
d0->key = 0; |
|||
|
|||
htable_init(&table, hash, NULL); |
|||
|
|||
htable_add(&table, d0->key, d0); |
|||
htable_add(&table, d1->key, d1); |
|||
|
|||
ok1(table.elems == 2); |
|||
ok1(htable_get(&table, 1, eq, &d1->key) == d1); |
|||
ok1(htable_get(&table, 0, eq, &d0->key) == d0); |
|||
htable_clear(&table); |
|||
|
|||
/* Now add in reverse order, should still be OK. */ |
|||
htable_add(&table, d1->key, d1); |
|||
htable_add(&table, d0->key, d0); |
|||
|
|||
ok1(table.elems == 2); |
|||
ok1(htable_get(&table, 1, eq, &d1->key) == d1); |
|||
ok1(htable_get(&table, 0, eq, &d0->key) == d0); |
|||
htable_clear(&table); |
|||
|
|||
free(d0); |
|||
free(d1); |
|||
return exit_status(); |
|||
} |
|||
|
@ -0,0 +1,207 @@ |
|||
#include <ccan/htable/htable.h> |
|||
#include <ccan/htable/htable.c> |
|||
#include <ccan/tap/tap.h> |
|||
#include <stdbool.h> |
|||
#include <string.h> |
|||
|
|||
#define NUM_BITS 7 |
|||
#define NUM_VALS (1 << NUM_BITS) |
|||
|
|||
/* We use the number divided by two as the hash (for lots of
|
|||
collisions), plus set all the higher bits so we can detect if they |
|||
don't get masked out. */ |
|||
static size_t hash(const void *elem, void *unused) |
|||
{ |
|||
size_t h = *(uint64_t *)elem / 2; |
|||
h |= -1UL << NUM_BITS; |
|||
return h; |
|||
} |
|||
|
|||
static bool objcmp(const void *htelem, void *cmpdata) |
|||
{ |
|||
return *(uint64_t *)htelem == *(uint64_t *)cmpdata; |
|||
} |
|||
|
|||
static void add_vals(struct htable *ht, |
|||
const uint64_t val[], |
|||
unsigned int off, unsigned int num) |
|||
{ |
|||
uint64_t i; |
|||
|
|||
for (i = off; i < off+num; i++) { |
|||
if (htable_get(ht, hash(&i, NULL), objcmp, &i)) { |
|||
fail("%llu already in hash", (long long)i); |
|||
return; |
|||
} |
|||
htable_add(ht, hash(&val[i], NULL), &val[i]); |
|||
if (htable_get(ht, hash(&i, NULL), objcmp, &i) != &val[i]) { |
|||
fail("%llu not added to hash", (long long)i); |
|||
return; |
|||
} |
|||
} |
|||
pass("Added %llu numbers to hash", (long long)i); |
|||
} |
|||
|
|||
#if 0 |
|||
static void refill_vals(struct htable *ht, |
|||
const uint64_t val[], unsigned int num) |
|||
{ |
|||
uint64_t i; |
|||
|
|||
for (i = 0; i < num; i++) { |
|||
if (htable_get(ht, hash(&i, NULL), objcmp, &i)) |
|||
continue; |
|||
htable_add(ht, hash(&val[i], NULL), &val[i]); |
|||
} |
|||
} |
|||
#endif |
|||
|
|||
static void find_vals(struct htable *ht, |
|||
const uint64_t val[], unsigned int num) |
|||
{ |
|||
uint64_t i; |
|||
|
|||
for (i = 0; i < num; i++) { |
|||
if (htable_get(ht, hash(&i, NULL), objcmp, &i) != &val[i]) { |
|||
fail("%llu not found in hash", (long long)i); |
|||
return; |
|||
} |
|||
} |
|||
pass("Found %llu numbers in hash", (long long)i); |
|||
} |
|||
|
|||
static void del_vals(struct htable *ht, |
|||
const uint64_t val[], unsigned int num) |
|||
{ |
|||
uint64_t i; |
|||
|
|||
for (i = 0; i < num; i++) { |
|||
if (!htable_del(ht, hash(&val[i], NULL), &val[i])) { |
|||
fail("%llu not deleted from hash", (long long)i); |
|||
return; |
|||
} |
|||
} |
|||
pass("Deleted %llu numbers in hash", (long long)i); |
|||
} |
|||
|
|||
static bool check_mask(struct htable *ht, uint64_t val[], unsigned num) |
|||
{ |
|||
uint64_t i; |
|||
|
|||
for (i = 0; i < num; i++) { |
|||
if (((uintptr_t)&val[i] & ht->common_mask) != ht->common_bits) |
|||
return false; |
|||
} |
|||
return true; |
|||
} |
|||
|
|||
int main(int argc, char *argv[]) |
|||
{ |
|||
unsigned int i, weight; |
|||
uintptr_t perfect_bit; |
|||
struct htable ht; |
|||
uint64_t val[NUM_VALS]; |
|||
uint64_t dne; |
|||
void *p; |
|||
struct htable_iter iter; |
|||
|
|||
plan_tests(35); |
|||
for (i = 0; i < NUM_VALS; i++) |
|||
val[i] = i; |
|||
dne = i; |
|||
|
|||
htable_init(&ht, hash, NULL); |
|||
ok1(ht.max == 0); |
|||
ok1(ht.bits == 0); |
|||
|
|||
/* We cannot find an entry which doesn't exist. */ |
|||
ok1(!htable_get(&ht, hash(&dne, NULL), objcmp, &dne)); |
|||
|
|||
/* This should increase it once. */ |
|||
add_vals(&ht, val, 0, 1); |
|||
ok1(ht.bits == 1); |
|||
ok1(ht.max == 1); |
|||
weight = 0; |
|||
for (i = 0; i < sizeof(ht.common_mask) * CHAR_BIT; i++) { |
|||
if (ht.common_mask & ((uintptr_t)1 << i)) { |
|||
weight++; |
|||
} |
|||
} |
|||
/* Only one bit should be clear. */ |
|||
ok1(weight == i-1); |
|||
|
|||
/* Mask should be set. */ |
|||
ok1(check_mask(&ht, val, 1)); |
|||
|
|||
/* This should increase it again. */ |
|||
add_vals(&ht, val, 1, 1); |
|||
ok1(ht.bits == 2); |
|||
ok1(ht.max == 3); |
|||
|
|||
/* Mask should be set. */ |
|||
ok1(ht.common_mask != 0); |
|||
ok1(ht.common_mask != -1); |
|||
ok1(check_mask(&ht, val, 2)); |
|||
|
|||
/* Now do the rest. */ |
|||
add_vals(&ht, val, 2, NUM_VALS - 2); |
|||
|
|||
/* Find all. */ |
|||
find_vals(&ht, val, NUM_VALS); |
|||
ok1(!htable_get(&ht, hash(&dne, NULL), objcmp, &dne)); |
|||
|
|||
/* Walk once, should get them all. */ |
|||
i = 0; |
|||
for (p = htable_first(&ht,&iter); p; p = htable_next(&ht, &iter)) |
|||
i++; |
|||
ok1(i == NUM_VALS); |
|||
|
|||
/* Delete all. */ |
|||
del_vals(&ht, val, NUM_VALS); |
|||
ok1(!htable_get(&ht, hash(&val[0], NULL), objcmp, &val[0])); |
|||
|
|||
/* Worst case, a "pointer" which doesn't have any matching bits. */ |
|||
htable_add(&ht, 0, (void *)~(uintptr_t)&val[NUM_VALS-1]); |
|||
htable_add(&ht, hash(&val[NUM_VALS-1], NULL), &val[NUM_VALS-1]); |
|||
ok1(ht.common_mask == 0); |
|||
ok1(ht.common_bits == 0); |
|||
/* Get rid of bogus pointer before we trip over it! */ |
|||
htable_del(&ht, 0, (void *)~(uintptr_t)&val[NUM_VALS-1]); |
|||
|
|||
/* Add the rest. */ |
|||
add_vals(&ht, val, 0, NUM_VALS-1); |
|||
|
|||
/* Check we can find them all. */ |
|||
find_vals(&ht, val, NUM_VALS); |
|||
ok1(!htable_get(&ht, hash(&dne, NULL), objcmp, &dne)); |
|||
|
|||
/* Corner cases: wipe out the perfect bit using bogus pointer. */ |
|||
htable_clear(&ht); |
|||
htable_add(&ht, 0, (void *)((uintptr_t)&val[NUM_VALS-1])); |
|||
ok1(ht.perfect_bit); |
|||
perfect_bit = ht.perfect_bit; |
|||
htable_add(&ht, 0, (void *)((uintptr_t)&val[NUM_VALS-1] |
|||
| perfect_bit)); |
|||
ok1(ht.perfect_bit == 0); |
|||
htable_del(&ht, 0, (void *)((uintptr_t)&val[NUM_VALS-1] | perfect_bit)); |
|||
|
|||
/* Enlarging should restore it... */ |
|||
add_vals(&ht, val, 0, NUM_VALS-1); |
|||
|
|||
ok1(ht.perfect_bit != 0); |
|||
htable_clear(&ht); |
|||
|
|||
ok1(htable_init_sized(&ht, hash, NULL, 1024)); |
|||
ok1(ht.max >= 1024); |
|||
htable_clear(&ht); |
|||
|
|||
ok1(htable_init_sized(&ht, hash, NULL, 1023)); |
|||
ok1(ht.max >= 1023); |
|||
htable_clear(&ht); |
|||
|
|||
ok1(htable_init_sized(&ht, hash, NULL, 1025)); |
|||
ok1(ht.max >= 1025); |
|||
htable_clear(&ht); |
|||
|
|||
return exit_status(); |
|||
} |
@ -0,0 +1,40 @@ |
|||
CCANDIR=../../.. |
|||
CFLAGS=-Wall -Werror -O3 -I$(CCANDIR) |
|||
#CFLAGS=-Wall -Werror -g -I$(CCANDIR)
|
|||
|
|||
CCAN_OBJS:=ccan-tal.o ccan-tal-str.o ccan-tal-grab_file.o ccan-take.o ccan-time.o ccan-str.o ccan-noerr.o ccan-list.o |
|||
|
|||
all: speed stringspeed hsearchspeed |
|||
|
|||
speed: speed.o hash.o $(CCAN_OBJS) |
|||
|
|||
speed.o: speed.c ../htable.h ../htable.c |
|||
|
|||
hash.o: ../../hash/hash.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
|||
|
|||
stringspeed: stringspeed.o hash.o $(CCAN_OBJS) |
|||
|
|||
stringspeed.o: speed.c ../htable.h ../htable.c |
|||
|
|||
hsearchspeed: hsearchspeed.o $(CCAN_OBJS) |
|||
|
|||
clean: |
|||
rm -f stringspeed speed hsearchspeed *.o |
|||
|
|||
ccan-tal.o: $(CCANDIR)/ccan/tal/tal.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
|||
ccan-tal-str.o: $(CCANDIR)/ccan/tal/str/str.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
|||
ccan-take.o: $(CCANDIR)/ccan/take/take.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
|||
ccan-tal-grab_file.o: $(CCANDIR)/ccan/tal/grab_file/grab_file.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
|||
ccan-time.o: $(CCANDIR)/ccan/time/time.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
|||
ccan-list.o: $(CCANDIR)/ccan/list/list.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
|||
ccan-str.o: $(CCANDIR)/ccan/str/str.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
|||
ccan-noerr.o: $(CCANDIR)/ccan/noerr/noerr.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
@ -0,0 +1,95 @@ |
|||
/* Simple speed tests for a hash of strings using hsearch */ |
|||
#include <ccan/htable/htable_type.h> |
|||
#include <ccan/htable/htable.c> |
|||
#include <ccan/tal/str/str.h> |
|||
#include <ccan/tal/grab_file/grab_file.h> |
|||
#include <ccan/tal/tal.h> |
|||
#include <ccan/hash/hash.h> |
|||
#include <ccan/time/time.h> |
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#include <string.h> |
|||
#include <time.h> |
|||
#include <unistd.h> |
|||
#include <sys/time.h> |
|||
#include <search.h> |
|||
|
|||
/* Nanoseconds per operation */ |
|||
static size_t normalize(const struct timeabs *start, |
|||
const struct timeabs *stop, |
|||
unsigned int num) |
|||
{ |
|||
return time_to_nsec(time_divide(time_between(*stop, *start), num)); |
|||
} |
|||
|
|||
int main(int argc, char *argv[]) |
|||
{ |
|||
size_t i, j, num; |
|||
struct timeabs start, stop; |
|||
char **w; |
|||
ENTRY *words, *misswords; |
|||
|
|||
w = tal_strsplit(NULL, grab_file(NULL, |
|||
argv[1] ? argv[1] : "/usr/share/dict/words"), "\n", STR_NO_EMPTY); |
|||
num = tal_count(w) - 1; |
|||
printf("%zu words\n", num); |
|||
|
|||
hcreate(num+num/3); |
|||
|
|||
words = tal_arr(w, ENTRY, num); |
|||
for (i = 0; i < num; i++) { |
|||
words[i].key = w[i]; |
|||
words[i].data = words[i].key; |
|||
} |
|||
|
|||
/* Append and prepend last char for miss testing. */ |
|||
misswords = tal_arr(w, ENTRY, num); |
|||
for (i = 0; i < num; i++) { |
|||
char lastc; |
|||
if (strlen(w[i])) |
|||
lastc = w[i][strlen(w[i])-1]; |
|||
else |
|||
lastc = 'z'; |
|||
misswords[i].key = tal_fmt(misswords, "%c%s%c%c", |
|||
lastc, w[i], lastc, lastc); |
|||
} |
|||
|
|||
printf("#01: Initial insert: "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) |
|||
hsearch(words[i], ENTER); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("#02: Initial lookup (match): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) |
|||
if (hsearch(words[i], FIND)->data != words[i].data) |
|||
abort(); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("#03: Initial lookup (miss): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) { |
|||
if (hsearch(misswords[i], FIND)) |
|||
abort(); |
|||
} |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
/* Lookups in order are very cache-friendly for judy; try random */ |
|||
printf("#04: Initial lookup (random): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num) |
|||
if (hsearch(words[i], FIND)->data != words[i].data) |
|||
abort(); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
return 0; |
|||
} |
@ -0,0 +1,370 @@ |
|||
/* Simple speed tests for hashtables. */ |
|||
#include <ccan/htable/htable_type.h> |
|||
#include <ccan/htable/htable.c> |
|||
#include <ccan/hash/hash.h> |
|||
#include <ccan/time/time.h> |
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#include <string.h> |
|||
#include <unistd.h> |
|||
|
|||
static size_t hashcount; |
|||
struct object { |
|||
/* The key. */ |
|||
unsigned int key; |
|||
|
|||
/* Some contents. Doubles as consistency check. */ |
|||
struct object *self; |
|||
}; |
|||
|
|||
static const unsigned int *objkey(const struct object *obj) |
|||
{ |
|||
return &obj->key; |
|||
} |
|||
|
|||
static size_t hash_obj(const unsigned int *key) |
|||
{ |
|||
hashcount++; |
|||
return hashl(key, 1, 0); |
|||
} |
|||
|
|||
static bool cmp(const struct object *object, const unsigned int *key) |
|||
{ |
|||
return object->key == *key; |
|||
} |
|||
|
|||
HTABLE_DEFINE_TYPE(struct object, objkey, hash_obj, cmp, htable_obj); |
|||
|
|||
static unsigned int popcount(unsigned long val) |
|||
{ |
|||
#if HAVE_BUILTIN_POPCOUNTL |
|||
return __builtin_popcountl(val); |
|||
#else |
|||
if (sizeof(long) == sizeof(u64)) { |
|||
u64 v = val; |
|||
v = (v & 0x5555555555555555ULL) |
|||
+ ((v >> 1) & 0x5555555555555555ULL); |
|||
v = (v & 0x3333333333333333ULL) |
|||
+ ((v >> 1) & 0x3333333333333333ULL); |
|||
v = (v & 0x0F0F0F0F0F0F0F0FULL) |
|||
+ ((v >> 1) & 0x0F0F0F0F0F0F0F0FULL); |
|||
v = (v & 0x00FF00FF00FF00FFULL) |
|||
+ ((v >> 1) & 0x00FF00FF00FF00FFULL); |
|||
v = (v & 0x0000FFFF0000FFFFULL) |
|||
+ ((v >> 1) & 0x0000FFFF0000FFFFULL); |
|||
v = (v & 0x00000000FFFFFFFFULL) |
|||
+ ((v >> 1) & 0x00000000FFFFFFFFULL); |
|||
return v; |
|||
} |
|||
val = (val & 0x55555555ULL) + ((val >> 1) & 0x55555555ULL); |
|||
val = (val & 0x33333333ULL) + ((val >> 1) & 0x33333333ULL); |
|||
val = (val & 0x0F0F0F0FULL) + ((val >> 1) & 0x0F0F0F0FULL); |
|||
val = (val & 0x00FF00FFULL) + ((val >> 1) & 0x00FF00FFULL); |
|||
val = (val & 0x0000FFFFULL) + ((val >> 1) & 0x0000FFFFULL); |
|||
return val; |
|||
#endif |
|||
} |
|||
|
|||
static size_t perfect(const struct htable *ht) |
|||
{ |
|||
size_t i, placed_perfect = 0; |
|||
|
|||
for (i = 0; i < ((size_t)1 << ht->bits); i++) { |
|||
if (!entry_is_valid(ht->table[i])) |
|||
continue; |
|||
if (hash_bucket(ht, ht->rehash(get_raw_ptr(ht, ht->table[i]), |
|||
ht->priv)) == i) { |
|||
assert((ht->table[i] & ht->perfect_bit) |
|||
== ht->perfect_bit); |
|||
placed_perfect++; |
|||
} |
|||
} |
|||
return placed_perfect; |
|||
} |
|||
|
|||
static size_t count_deleted(const struct htable *ht) |
|||
{ |
|||
size_t i, delete_markers = 0; |
|||
|
|||
for (i = 0; i < ((size_t)1 << ht->bits); i++) { |
|||
if (ht->table[i] == HTABLE_DELETED) |
|||
delete_markers++; |
|||
} |
|||
return delete_markers; |
|||
} |
|||
|
|||
/* Nanoseconds per operation */ |
|||
static size_t normalize(const struct timeabs *start, |
|||
const struct timeabs *stop, |
|||
unsigned int num) |
|||
{ |
|||
return time_to_nsec(time_divide(time_between(*stop, *start), num)); |
|||
} |
|||
|
|||
static size_t worst_run(struct htable *ht, size_t *deleted) |
|||
{ |
|||
size_t longest = 0, len = 0, this_del = 0, i; |
|||
|
|||
*deleted = 0; |
|||
/* This doesn't take into account end-wrap, but gives an idea. */ |
|||
for (i = 0; i < ((size_t)1 << ht->bits); i++) { |
|||
if (ht->table[i]) { |
|||
len++; |
|||
if (ht->table[i] == HTABLE_DELETED) |
|||
this_del++; |
|||
} else { |
|||
if (len > longest) { |
|||
longest = len; |
|||
*deleted = this_del; |
|||
} |
|||
len = 0; |
|||
this_del = 0; |
|||
} |
|||
} |
|||
return longest; |
|||
} |
|||
|
|||
int main(int argc, char *argv[]) |
|||
{ |
|||
struct object *objs; |
|||
unsigned int i, j; |
|||
size_t num, deleted; |
|||
struct timeabs start, stop; |
|||
struct htable_obj ht; |
|||
bool make_dumb = false; |
|||
|
|||
if (argv[1] && strcmp(argv[1], "--dumb") == 0) { |
|||
argv++; |
|||
make_dumb = true; |
|||
} |
|||
num = argv[1] ? atoi(argv[1]) : 1000000; |
|||
objs = calloc(num, sizeof(objs[0])); |
|||
|
|||
for (i = 0; i < num; i++) { |
|||
objs[i].key = i; |
|||
objs[i].self = &objs[i]; |
|||
} |
|||
|
|||
htable_obj_init(&ht); |
|||
|
|||
printf("Initial insert: "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) |
|||
htable_obj_add(&ht, objs[i].self); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
printf("Details: hash size %u, mask bits %u, perfect %.0f%%\n", |
|||
1U << ht.raw.bits, popcount(ht.raw.common_mask), |
|||
perfect(&ht.raw) * 100.0 / ht.raw.elems); |
|||
|
|||
if (make_dumb) { |
|||
/* Screw with mask, to hobble us. */ |
|||
update_common(&ht.raw, (void *)~ht.raw.common_bits); |
|||
printf("Details: DUMB MODE: mask bits %u\n", |
|||
popcount(ht.raw.common_mask)); |
|||
} |
|||
|
|||
printf("Initial lookup (match): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) |
|||
if (htable_obj_get(&ht, &i)->self != objs[i].self) |
|||
abort(); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("Initial lookup (miss): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) { |
|||
unsigned int n = i + num; |
|||
if (htable_obj_get(&ht, &n)) |
|||
abort(); |
|||
} |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
/* Lookups in order are very cache-friendly for judy; try random */ |
|||
printf("Initial lookup (random): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num) |
|||
if (htable_obj_get(&ht, &j)->self != &objs[j]) |
|||
abort(); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
hashcount = 0; |
|||
printf("Initial delete all: "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) |
|||
if (!htable_obj_del(&ht, objs[i].self)) |
|||
abort(); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
printf("Details: rehashes %zu\n", hashcount); |
|||
|
|||
printf("Initial re-inserting: "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) |
|||
htable_obj_add(&ht, objs[i].self); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
hashcount = 0; |
|||
printf("Deleting first half: "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i+=2) |
|||
if (!htable_obj_del(&ht, objs[i].self)) |
|||
abort(); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("Details: rehashes %zu, delete markers %zu\n", |
|||
hashcount, count_deleted(&ht.raw)); |
|||
|
|||
printf("Adding (a different) half: "); |
|||
fflush(stdout); |
|||
|
|||
for (i = 0; i < num; i+=2) |
|||
objs[i].key = num+i; |
|||
|
|||
start = time_now(); |
|||
for (i = 0; i < num; i+=2) |
|||
htable_obj_add(&ht, objs[i].self); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("Details: delete markers %zu, perfect %.0f%%\n", |
|||
count_deleted(&ht.raw), perfect(&ht.raw) * 100.0 / ht.raw.elems); |
|||
|
|||
printf("Lookup after half-change (match): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 1; i < num; i+=2) |
|||
if (htable_obj_get(&ht, &i)->self != objs[i].self) |
|||
abort(); |
|||
for (i = 0; i < num; i+=2) { |
|||
unsigned int n = i + num; |
|||
if (htable_obj_get(&ht, &n)->self != objs[i].self) |
|||
abort(); |
|||
} |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("Lookup after half-change (miss): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) { |
|||
unsigned int n = i + num * 2; |
|||
if (htable_obj_get(&ht, &n)) |
|||
abort(); |
|||
} |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
/* Hashtables with delete markers can fill with markers over time.
|
|||
* so do some changes to see how it operates in long-term. */ |
|||
for (i = 0; i < 5; i++) { |
|||
if (i == 0) { |
|||
/* We don't measure this: jmap is different. */ |
|||
printf("Details: initial churn\n"); |
|||
} else { |
|||
printf("Churning %s time: ", |
|||
i == 1 ? "second" |
|||
: i == 2 ? "third" |
|||
: i == 3 ? "fourth" |
|||
: "fifth"); |
|||
fflush(stdout); |
|||
} |
|||
start = time_now(); |
|||
for (j = 0; j < num; j++) { |
|||
if (!htable_obj_del(&ht, &objs[j])) |
|||
abort(); |
|||
objs[j].key = num*i+j; |
|||
if (!htable_obj_add(&ht, &objs[j])) |
|||
abort(); |
|||
} |
|||
stop = time_now(); |
|||
if (i != 0) |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
} |
|||
|
|||
/* Spread out the keys more to try to make it harder. */ |
|||
printf("Details: reinserting with spread\n"); |
|||
for (i = 0; i < num; i++) { |
|||
if (!htable_obj_del(&ht, objs[i].self)) |
|||
abort(); |
|||
objs[i].key = num * 5 + i * 9; |
|||
if (!htable_obj_add(&ht, objs[i].self)) |
|||
abort(); |
|||
} |
|||
printf("Details: delete markers %zu, perfect %.0f%%\n", |
|||
count_deleted(&ht.raw), perfect(&ht.raw) * 100.0 / ht.raw.elems); |
|||
i = worst_run(&ht.raw, &deleted); |
|||
printf("Details: worst run %u (%zu deleted)\n", i, deleted); |
|||
|
|||
printf("Lookup after churn & spread (match): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) { |
|||
unsigned int n = num * 5 + i * 9; |
|||
if (htable_obj_get(&ht, &n)->self != objs[i].self) |
|||
abort(); |
|||
} |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("Lookup after churn & spread (miss): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) { |
|||
unsigned int n = num * (5 + 9) + i * 9; |
|||
if (htable_obj_get(&ht, &n)) |
|||
abort(); |
|||
} |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("Lookup after churn & spread (random): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num) { |
|||
unsigned int n = num * 5 + j * 9; |
|||
if (htable_obj_get(&ht, &n)->self != &objs[j]) |
|||
abort(); |
|||
} |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
hashcount = 0; |
|||
printf("Deleting half after churn & spread: "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i+=2) |
|||
if (!htable_obj_del(&ht, objs[i].self)) |
|||
abort(); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("Adding (a different) half after churn & spread: "); |
|||
fflush(stdout); |
|||
|
|||
for (i = 0; i < num; i+=2) |
|||
objs[i].key = num*6+i*9; |
|||
|
|||
start = time_now(); |
|||
for (i = 0; i < num; i+=2) |
|||
htable_obj_add(&ht, objs[i].self); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("Details: delete markers %zu, perfect %.0f%%\n", |
|||
count_deleted(&ht.raw), perfect(&ht.raw) * 100.0 / ht.raw.elems); |
|||
|
|||
return 0; |
|||
} |
@ -0,0 +1,240 @@ |
|||
/* Simple speed tests for a hash of strings. */ |
|||
#include <ccan/htable/htable_type.h> |
|||
#include <ccan/htable/htable.c> |
|||
#include <ccan/tal/str/str.h> |
|||
#include <ccan/tal/grab_file/grab_file.h> |
|||
#include <ccan/tal/tal.h> |
|||
#include <ccan/hash/hash.h> |
|||
#include <ccan/time/time.h> |
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#include <string.h> |
|||
#include <time.h> |
|||
#include <unistd.h> |
|||
#include <sys/time.h> |
|||
|
|||
static size_t hashcount; |
|||
|
|||
static const char *strkey(const char *str) |
|||
{ |
|||
return str; |
|||
} |
|||
|
|||
static size_t hash_str(const char *key) |
|||
{ |
|||
hashcount++; |
|||
return hash(key, strlen(key), 0); |
|||
} |
|||
|
|||
static bool cmp(const char *obj, const char *key) |
|||
{ |
|||
return strcmp(obj, key) == 0; |
|||
} |
|||
|
|||
HTABLE_DEFINE_TYPE(char, strkey, hash_str, cmp, htable_str); |
|||
|
|||
/* Nanoseconds per operation */ |
|||
static size_t normalize(const struct timeabs *start, |
|||
const struct timeabs *stop, |
|||
unsigned int num) |
|||
{ |
|||
return time_to_nsec(time_divide(time_between(*stop, *start), num)); |
|||
} |
|||
|
|||
int main(int argc, char *argv[]) |
|||
{ |
|||
size_t i, j, num; |
|||
struct timeabs start, stop; |
|||
struct htable_str ht; |
|||
char **words, **misswords; |
|||
|
|||
words = tal_strsplit(NULL, grab_file(NULL, |
|||
argv[1] ? argv[1] : "/usr/share/dict/words"), "\n", |
|||
STR_NO_EMPTY); |
|||
htable_str_init(&ht); |
|||
num = tal_count(words) - 1; |
|||
/* Note that on my system, num is just > 98304, where we double! */ |
|||
printf("%zu words\n", num); |
|||
|
|||
/* Append and prepend last char for miss testing. */ |
|||
misswords = tal_arr(words, char *, num); |
|||
for (i = 0; i < num; i++) { |
|||
char lastc; |
|||
if (strlen(words[i])) |
|||
lastc = words[i][strlen(words[i])-1]; |
|||
else |
|||
lastc = 'z'; |
|||
misswords[i] = tal_fmt(misswords, "%c%s%c%c", |
|||
lastc, words[i], lastc, lastc); |
|||
} |
|||
|
|||
printf("#01: Initial insert: "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) |
|||
htable_str_add(&ht, words[i]); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("Bytes allocated: %zu\n", |
|||
sizeof(ht.raw.table[0]) << ht.raw.bits); |
|||
|
|||
printf("#02: Initial lookup (match): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) |
|||
if (htable_str_get(&ht, words[i]) != words[i]) |
|||
abort(); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("#03: Initial lookup (miss): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) { |
|||
if (htable_str_get(&ht, misswords[i])) |
|||
abort(); |
|||
} |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
/* Lookups in order are very cache-friendly for judy; try random */ |
|||
printf("#04: Initial lookup (random): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num) |
|||
if (htable_str_get(&ht, words[j]) != words[j]) |
|||
abort(); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
hashcount = 0; |
|||
printf("#05: Initial delete all: "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) |
|||
if (!htable_str_del(&ht, words[i])) |
|||
abort(); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("#06: Initial re-inserting: "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) |
|||
htable_str_add(&ht, words[i]); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
hashcount = 0; |
|||
printf("#07: Deleting first half: "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i+=2) |
|||
if (!htable_str_del(&ht, words[i])) |
|||
abort(); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("#08: Adding (a different) half: "); |
|||
fflush(stdout); |
|||
|
|||
start = time_now(); |
|||
for (i = 0; i < num; i+=2) |
|||
htable_str_add(&ht, misswords[i]); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("#09: Lookup after half-change (match): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 1; i < num; i+=2) |
|||
if (htable_str_get(&ht, words[i]) != words[i]) |
|||
abort(); |
|||
for (i = 0; i < num; i+=2) { |
|||
if (htable_str_get(&ht, misswords[i]) != misswords[i]) |
|||
abort(); |
|||
} |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("#10: Lookup after half-change (miss): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i+=2) |
|||
if (htable_str_get(&ht, words[i])) |
|||
abort(); |
|||
for (i = 1; i < num; i+=2) { |
|||
if (htable_str_get(&ht, misswords[i])) |
|||
abort(); |
|||
} |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
/* Hashtables with delete markers can fill with markers over time.
|
|||
* so do some changes to see how it operates in long-term. */ |
|||
printf("#11: Churn 1: "); |
|||
start = time_now(); |
|||
for (j = 0; j < num; j+=2) { |
|||
if (!htable_str_del(&ht, misswords[j])) |
|||
abort(); |
|||
if (!htable_str_add(&ht, words[j])) |
|||
abort(); |
|||
} |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("#12: Churn 2: "); |
|||
start = time_now(); |
|||
for (j = 1; j < num; j+=2) { |
|||
if (!htable_str_del(&ht, words[j])) |
|||
abort(); |
|||
if (!htable_str_add(&ht, misswords[j])) |
|||
abort(); |
|||
} |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("#13: Churn 3: "); |
|||
start = time_now(); |
|||
for (j = 1; j < num; j+=2) { |
|||
if (!htable_str_del(&ht, misswords[j])) |
|||
abort(); |
|||
if (!htable_str_add(&ht, words[j])) |
|||
abort(); |
|||
} |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
/* Now it's back to normal... */ |
|||
printf("#14: Post-Churn lookup (match): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) |
|||
if (htable_str_get(&ht, words[i]) != words[i]) |
|||
abort(); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
printf("#15: Post-Churn lookup (miss): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0; i < num; i++) { |
|||
if (htable_str_get(&ht, misswords[i])) |
|||
abort(); |
|||
} |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
/* Lookups in order are very cache-friendly for judy; try random */ |
|||
printf("#16: Post-Churn lookup (random): "); |
|||
fflush(stdout); |
|||
start = time_now(); |
|||
for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num) |
|||
if (htable_str_get(&ht, words[j]) != words[j]) |
|||
abort(); |
|||
stop = time_now(); |
|||
printf(" %zu ns\n", normalize(&start, &stop, num)); |
|||
|
|||
return 0; |
|||
} |
@ -0,0 +1 @@ |
|||
../../licenses/CC0 |
@ -0,0 +1,33 @@ |
|||
#include "config.h" |
|||
#include <stdio.h> |
|||
#include <string.h> |
|||
|
|||
/** |
|||
* order - Simple, common value comparison functions |
|||
* |
|||
* This implements a number of commonly useful comparison functions in |
|||
* a form which can be used with qsort() and bsearch() in the standard |
|||
* library, or asort() and asearch() in ccan amongst other places. |
|||
* |
|||
* License: CC0 |
|||
* Author: David Gibson <david@gibson.dropbear.id.au> |
|||
*/ |
|||
int main(int argc, char *argv[]) |
|||
{ |
|||
/* Expect exactly one argument */ |
|||
if (argc != 2) |
|||
return 1; |
|||
|
|||
if (strcmp(argv[1], "depends") == 0) { |
|||
printf("ccan/typesafe_cb\n"); |
|||
printf("ccan/ptrint\n"); |
|||
return 0; |
|||
} |
|||
if (strcmp(argv[1], "testdepends") == 0) { |
|||
printf("ccan/array_size\n"); |
|||
printf("ccan/asort\n"); |
|||
return 0; |
|||
} |
|||
|
|||
return 1; |
|||
} |
@ -0,0 +1,70 @@ |
|||
/* CC0 license (public domain) - see LICENSE file for details */ |
|||
|
|||
#include <ccan/order/order.h> |
|||
|
|||
#define SCALAR_ORDER(_oname, _type) \ |
|||
int _order_##_oname(const void *a, \ |
|||
const void *b, \ |
|||
void *ctx) \ |
|||
{ \ |
|||
ptrdiff_t offset = ptr2int(ctx); \ |
|||
const _type *aa = (const _type *)((char *)a + offset); \ |
|||
const _type *bb = (const _type *)((char *)b + offset); \ |
|||
\ |
|||
if (*aa < *bb) { \ |
|||
return -1; \ |
|||
} else if (*aa > *bb) { \ |
|||
return 1; \ |
|||
} else { \ |
|||
assert(*aa == *bb); \ |
|||
return 0; \ |
|||
} \ |
|||
} \ |
|||
int order_##_oname(const _type *a, \ |
|||
const _type *b, \ |
|||
void *ctx) \ |
|||
{ \ |
|||
return _order_##_oname(a, b, int2ptr(0)); \ |
|||
} \ |
|||
int _order_##_oname##_reverse(const void *a, \ |
|||
const void *b, \ |
|||
void *ctx) \ |
|||
{ \ |
|||
return -_order_##_oname(a, b, ctx); \ |
|||
} \ |
|||
int order_##_oname##_reverse(const _type *a, \ |
|||
const _type *b, \ |
|||
void *ctx) \ |
|||
{ \ |
|||
return _order_##_oname##_reverse(a, b, int2ptr(0)); \ |
|||
} \ |
|||
int order_##_oname##_noctx(const void *a, \ |
|||
const void *b) \ |
|||
{ \ |
|||
return _order_##_oname(a, b, int2ptr(0)); \ |
|||
} \ |
|||
int order_##_oname##_reverse_noctx(const void *a, \ |
|||
const void *b) \ |
|||
{ \ |
|||
return _order_##_oname##_reverse(a, b, int2ptr(0)); \ |
|||
} |
|||
|
|||
SCALAR_ORDER(s8, int8_t) |
|||
SCALAR_ORDER(s16, int16_t) |
|||
SCALAR_ORDER(s32, int32_t) |
|||
SCALAR_ORDER(s64, int64_t) |
|||
|
|||
SCALAR_ORDER(u8, uint8_t) |
|||
SCALAR_ORDER(u16, uint16_t) |
|||
SCALAR_ORDER(u32, uint32_t) |
|||
SCALAR_ORDER(u64, uint64_t) |
|||
|
|||
SCALAR_ORDER(int, int) |
|||
SCALAR_ORDER(uint, unsigned int) |
|||
SCALAR_ORDER(long, long) |
|||
SCALAR_ORDER(ulong, unsigned long) |
|||
SCALAR_ORDER(size, size_t) |
|||
SCALAR_ORDER(ptrdiff, ptrdiff_t) |
|||
|
|||
SCALAR_ORDER(float, float) |
|||
SCALAR_ORDER(double, double) |
@ -0,0 +1,73 @@ |
|||
/* CC0 license (public domain) - see LICENSE file for details */ |
|||
#ifndef CCAN_ORDER_H |
|||
#define CCAN_ORDER_H |
|||
|
|||
#include <stdint.h> |
|||
#include <assert.h> |
|||
|
|||
#include <ccan/typesafe_cb/typesafe_cb.h> |
|||
#include <ccan/ptrint/ptrint.h> |
|||
|
|||
typedef int (*_total_order_cb)(const void *, const void *, void *); |
|||
typedef int (*total_order_noctx_cb)(const void *, const void *); |
|||
|
|||
#define total_order_cb(_name, _item, _ctx) \ |
|||
int (*_name)(const __typeof__(_item) *, \ |
|||
const __typeof__(_item) *, \ |
|||
__typeof__(_ctx)) |
|||
|
|||
#define total_order_cast(cmp, item, ctx) \ |
|||
typesafe_cb_cast(_total_order_cb, total_order_cb(, item, ctx), \ |
|||
(cmp)) |
|||
|
|||
struct _total_order { |
|||
_total_order_cb cb; |
|||
void *ctx; |
|||
}; |
|||
|
|||
#define total_order(_name, _item, _ctx) \ |
|||
struct { \ |
|||
total_order_cb(cb, _item, _ctx); \ |
|||
_ctx ctx; \ |
|||
} _name |
|||
|
|||
#define _DECL_ONAME(_oname, _itype) \ |
|||
extern int _order_##_oname(const void *, const void *, void *); \ |
|||
extern int order_##_oname(const _itype *, const _itype *, void *); \ |
|||
extern int order_##_oname##_noctx(const void *, const void *); |
|||
|
|||
#define _DECL_ONAME_BIDIR(_oname, _itype) \ |
|||
_DECL_ONAME(_oname, _itype) \ |
|||
_DECL_ONAME(_oname##_reverse, _itype) |
|||
|
|||
_DECL_ONAME_BIDIR(s8, int8_t) |
|||
_DECL_ONAME_BIDIR(s16, int16_t) |
|||
_DECL_ONAME_BIDIR(s32, int32_t) |
|||
_DECL_ONAME_BIDIR(s64, int64_t) |
|||
|
|||
_DECL_ONAME_BIDIR(u8, uint8_t) |
|||
_DECL_ONAME_BIDIR(u16, uint16_t) |
|||
_DECL_ONAME_BIDIR(u32, uint32_t) |
|||
_DECL_ONAME_BIDIR(u64, uint64_t) |
|||
|
|||
_DECL_ONAME_BIDIR(int, int) |
|||
_DECL_ONAME_BIDIR(uint, unsigned int) |
|||
_DECL_ONAME_BIDIR(long, long) |
|||
_DECL_ONAME_BIDIR(ulong, unsigned long) |
|||
_DECL_ONAME_BIDIR(size, size_t) |
|||
_DECL_ONAME_BIDIR(ptrdiff, ptrdiff_t) |
|||
|
|||
_DECL_ONAME_BIDIR(float, float) |
|||
_DECL_ONAME_BIDIR(double, double) |
|||
|
|||
#undef _DECL_ONAME |
|||
#undef _DECL_ONAME_BIDIR |
|||
|
|||
#define total_order_by_field(_name, _oname, _itype, _field) \ |
|||
total_order(_name, _itype, ptrint_t *) = { \ |
|||
(total_order_cb(, _itype, \ |
|||
ptrint_t *))(_order_##_oname), \ |
|||
int2ptr(offsetof(_itype, _field)), \ |
|||
} |
|||
|
|||
#endif /* CCAN_ORDER_H */ |
@ -0,0 +1,138 @@ |
|||
#include "config.h" |
|||
|
|||
#include <string.h> |
|||
#include <stdlib.h> |
|||
#include <limits.h> |
|||
#include <float.h> |
|||
#include <math.h> |
|||
|
|||
#include <ccan/array_size/array_size.h> |
|||
|
|||
#include <ccan/order/order.h> |
|||
#include <ccan/tap/tap.h> |
|||
|
|||
#include <ccan/asort/asort.h> |
|||
|
|||
#define QSORT_SCALAR(t, oname, ...) \ |
|||
{ \ |
|||
t arr0[] = { __VA_ARGS__ }; \ |
|||
const int num = ARRAY_SIZE(arr0); \ |
|||
t arr1[num], arr2[num]; \ |
|||
int i; \ |
|||
\ |
|||
/* Intialize arr1 in reverse order */ \ |
|||
for (i = 0; i < num; i++) \ |
|||
arr1[i] = arr0[num-i-1]; \ |
|||
\ |
|||
memcpy(arr2, arr1, sizeof(arr1)); \ |
|||
qsort(arr2, num, sizeof(t), order_##oname##_noctx); \ |
|||
ok(memcmp(arr2, arr0, sizeof(arr0)) == 0, \ |
|||
"qsort order_%s_noctx", #oname); \ |
|||
\ |
|||
qsort(arr2, num, sizeof(t), order_##oname##_reverse_noctx); \ |
|||
ok(memcmp(arr2, arr1, sizeof(arr1)) == 0, \ |
|||
"qsort order_%s_reverse_noctx", #oname); \ |
|||
} |
|||
|
|||
#define ASORT_SCALAR(t, oname, ...) \ |
|||
{ \ |
|||
t arr0[] = { __VA_ARGS__ }; \ |
|||
const int num = ARRAY_SIZE(arr0); \ |
|||
t arr1[num], arr2[num]; \ |
|||
int i; \ |
|||
\ |
|||
/* Intialize arr1 in reverse order */ \ |
|||
for (i = 0; i < num; i++) \ |
|||
arr1[i] = arr0[num-i-1]; \ |
|||
\ |
|||
memcpy(arr2, arr1, sizeof(arr1)); \ |
|||
asort(arr2, num, order_##oname, NULL); \ |
|||
ok(memcmp(arr2, arr0, sizeof(arr0)) == 0, \ |
|||
"asort order_%s", #oname); \ |
|||
\ |
|||
asort(arr2, num, order_##oname##_reverse, NULL); \ |
|||
ok(memcmp(arr2, arr1, sizeof(arr1)) == 0, \ |
|||
"asort order_%s_reverse", #oname); \ |
|||
} |
|||
|
|||
#define ASORT_STRUCT_BY_SCALAR(t, oname, ...) \ |
|||
{ \ |
|||
t arrbase[] = { __VA_ARGS__ }; \ |
|||
struct tstruct { \ |
|||
char dummy0[5]; \ |
|||
t val; \ |
|||
long dummy1; \ |
|||
}; \ |
|||
const int num = ARRAY_SIZE(arrbase); \ |
|||
struct tstruct arr0[num], arr1[num], arr2[num]; \ |
|||
int i; \ |
|||
total_order_by_field(order, oname, struct tstruct, val); \ |
|||
total_order_by_field(rorder, oname##_reverse, \ |
|||
struct tstruct, val); \ |
|||
\ |
|||
/* Set up dummy structures */ \ |
|||
memset(arr0, 0, sizeof(arr0)); \ |
|||
for (i = 0; i < num; i++) { \ |
|||
arr0[i].dummy1 = i; \ |
|||
strcpy(arr0[i].dummy0, "abc"); \ |
|||
arr0[i].val = arrbase[i]; \ |
|||
} \ |
|||
\ |
|||
/* Intialize arr1 in reverse order */ \ |
|||
for (i = 0; i < num; i++) \ |
|||
arr1[i] = arr0[num-i-1]; \ |
|||
\ |
|||
memcpy(arr2, arr1, sizeof(arr1)); \ |
|||
asort(arr2, num, order.cb, order.ctx); \ |
|||
ok(memcmp(arr2, arr0, sizeof(arr0)) == 0, \ |
|||
"asort by field %s", #oname); \ |
|||
\ |
|||
asort(arr2, num, rorder.cb, rorder.ctx); \ |
|||
ok(memcmp(arr2, arr1, sizeof(arr1)) == 0, \ |
|||
"asort by field %s_reverse", #oname); \ |
|||
} |
|||
|
|||
#define TEST_SCALAR(t, oname, ...) \ |
|||
{ \ |
|||
QSORT_SCALAR(t, oname, __VA_ARGS__); \ |
|||
ASORT_SCALAR(t, oname, __VA_ARGS__); \ |
|||
ASORT_STRUCT_BY_SCALAR(t, oname, __VA_ARGS__); \ |
|||
} |
|||
|
|||
int main(void) |
|||
{ |
|||
/* This is how many tests you plan to run */ |
|||
plan_tests(84); |
|||
|
|||
TEST_SCALAR(int8_t, s8, -128, -4, 0, 1, 2, 88, 126, 127); |
|||
TEST_SCALAR(int16_t, s16, -32768, -4, 0, 1, 2, 88, 126, 32767); |
|||
TEST_SCALAR(int32_t, s32, -2000000000, -4, 0, 1, 2, 88, 126, |
|||
2000000000); |
|||
TEST_SCALAR(int64_t, s64, -999999999999999999LL, -2000000000, -4, 0, |
|||
1, 2, 88, 126, 2000000000, 999999999999999999LL); |
|||
|
|||
TEST_SCALAR(uint8_t, u8, 0, 1, 2, 88, 126, 127, -10, -1); |
|||
TEST_SCALAR(uint16_t, u16, 0, 1, 2, 88, 126, 32767, -10, -1); |
|||
TEST_SCALAR(uint32_t, u32, 0, 1, 2, 88, 126, 2000000000, -10, -1); |
|||
TEST_SCALAR(uint64_t, u64, 0, 1, 2, 88, 126, 2000000000, |
|||
999999999999999999LL, -10, -1); |
|||
|
|||
TEST_SCALAR(int, int, INT_MIN, -10, -1, 0, 1, 10, INT_MAX); |
|||
TEST_SCALAR(unsigned, uint, 0, 1, 10, INT_MAX, (unsigned)INT_MAX+1, |
|||
-10, -1); |
|||
|
|||
TEST_SCALAR(long, long, LONG_MIN, INT_MIN, -10, -1, 0, 1, 10, INT_MAX, |
|||
LONG_MAX); |
|||
TEST_SCALAR(unsigned long, ulong, 0, 1, 10, INT_MAX, |
|||
(unsigned long)INT_MAX+1, LONG_MAX, |
|||
(unsigned long)LONG_MAX+1, -10, -1); |
|||
|
|||
TEST_SCALAR(float, float, -INFINITY, -FLT_MAX, -1.0, 0.0, FLT_MIN, |
|||
0.1, M_E, M_PI, 5.79, FLT_MAX, INFINITY); |
|||
TEST_SCALAR(double, double, -INFINITY, -DBL_MAX, -FLT_MAX, -1.0, 0.0, |
|||
DBL_MIN, FLT_MIN, 0.1, M_E, M_PI, 5.79, FLT_MAX, DBL_MAX, |
|||
INFINITY); |
|||
|
|||
/* This exits depending on whether all tests passed */ |
|||
return exit_status(); |
|||
} |
@ -0,0 +1,24 @@ |
|||
#include <stdlib.h> |
|||
#include <string.h> |
|||
#include <stdio.h> |
|||
|
|||
#include <ccan/order/order.h> |
|||
|
|||
#include "fancy_cmp.h" |
|||
|
|||
#ifdef FAIL |
|||
typedef int item_t; |
|||
#else |
|||
typedef struct item item_t; |
|||
#endif |
|||
|
|||
int main(int argc, char *argv[]) |
|||
{ |
|||
total_order_cb(cb0, struct item, struct cmp_info *) = fancy_cmp; |
|||
_total_order_cb cb1 = total_order_cast(fancy_cmp, |
|||
item_t, struct cmp_info *); |
|||
|
|||
printf("%p %p\n", cb0, cb1); |
|||
|
|||
exit(0); |
|||
} |
@ -0,0 +1,25 @@ |
|||
#include <stdlib.h> |
|||
#include <string.h> |
|||
#include <stdio.h> |
|||
|
|||
#include <ccan/order/order.h> |
|||
|
|||
#include "fancy_cmp.h" |
|||
|
|||
#ifdef FAIL |
|||
typedef int ctx_t; |
|||
#else |
|||
typedef struct cmp_info ctx_t; |
|||
#endif |
|||
|
|||
int main(int argc, char *argv[]) |
|||
{ |
|||
total_order_cb(cb0, struct item, struct cmp_info *) = fancy_cmp; |
|||
_total_order_cb cb1 = total_order_cast(fancy_cmp, struct item, |
|||
ctx_t *); |
|||
|
|||
printf("%p %p\n", cb0, cb1); |
|||
|
|||
exit(0); |
|||
|
|||
} |
@ -0,0 +1,19 @@ |
|||
#include <stdlib.h> |
|||
#include <string.h> |
|||
#include <stdio.h> |
|||
|
|||
#include <ccan/order/order.h> |
|||
|
|||
#include "fancy_cmp.h" |
|||
|
|||
int main(int argc, char *argv[]) |
|||
{ |
|||
total_order_cb(cb0, struct item, struct cmp_info *) = fancy_cmp; |
|||
_total_order_cb cb1 = total_order_cast(fancy_cmp, |
|||
struct item, struct cmp_info *); |
|||
total_order_noctx_cb cb_noctx = fancy_cmp_noctx; |
|||
|
|||
printf("%p %p %p\n", cb0, cb1, cb_noctx); |
|||
|
|||
exit(0); |
|||
} |
@ -0,0 +1,47 @@ |
|||
#ifndef _FANCY_CMP_H |
|||
#define _FANCY_CMP_H |
|||
|
|||
struct cmp_info { |
|||
unsigned xcode; |
|||
int offset; |
|||
}; |
|||
|
|||
struct item { |
|||
unsigned value; |
|||
char *str; |
|||
}; |
|||
|
|||
static inline int fancy_cmp(const struct item *a, const struct item *b, |
|||
struct cmp_info *ctx) |
|||
{ |
|||
unsigned vala = a->value ^ ctx->xcode; |
|||
unsigned valb = b->value ^ ctx->xcode; |
|||
const char *stra, *strb; |
|||
|
|||
if (vala < valb) |
|||
return -1; |
|||
else if (valb < vala) |
|||
return 1; |
|||
|
|||
stra = a->str + ctx->offset; |
|||
strb = b->str + ctx->offset; |
|||
|
|||
return strcmp(stra, strb); |
|||
} |
|||
|
|||
static inline int fancy_cmp_noctx(const void *av, const void *bv) |
|||
{ |
|||
const struct item *a = (const struct item *)av; |
|||
const struct item *b = (const struct item *)bv; |
|||
struct cmp_info ctx_default = { |
|||
.xcode = 0x1234, |
|||
.offset = 3, |
|||
}; |
|||
total_order(default_order, struct item, struct cmp_info *) = { |
|||
fancy_cmp, &ctx_default, |
|||
}; |
|||
|
|||
return default_order.cb(a, b, default_order.ctx); |
|||
} |
|||
|
|||
#endif /* _FANCY_CMP_H */ |
@ -0,0 +1 @@ |
|||
../../licenses/CC0 |
@ -0,0 +1,59 @@ |
|||
#include "config.h" |
|||
#include <stdio.h> |
|||
#include <string.h> |
|||
|
|||
/** |
|||
* ptrint - Encoding integers in pointer values |
|||
* |
|||
* Library (standard or ccan) functions which take user supplied |
|||
* callbacks usually have the callback supplied with a void * context |
|||
* pointer. For simple cases, it's sometimes sufficient to pass a |
|||
* simple integer cast into a void *, rather than having to allocate a |
|||
* context structure. This module provides some helper macros to do |
|||
* this relatively safely and portably. |
|||
* |
|||
* The key characteristics of these functions are: |
|||
* ptr2int(int2ptr(val)) == val |
|||
* and |
|||
* !int2ptr(val) == !val |
|||
* (i.e. the transformation preserves truth value). |
|||
* |
|||
* Example: |
|||
* #include <ccan/ptrint/ptrint.h> |
|||
* |
|||
* static void callback(void *opaque) |
|||
* { |
|||
* int val = ptr2int(opaque); |
|||
* printf("Value is %d\n", val); |
|||
* } |
|||
* |
|||
* void (*cb)(void *opaque) = callback; |
|||
* |
|||
* int main(int argc, char *argv[]) |
|||
* { |
|||
* int val = 17; |
|||
* |
|||
* (*cb)(int2ptr(val)); |
|||
* exit(0); |
|||
* } |
|||
* |
|||
* License: CC0 (Public domain) |
|||
* Author: David Gibson <david@gibson.dropbear.id.au> |
|||
*/ |
|||
int main(int argc, char *argv[]) |
|||
{ |
|||
/* Expect exactly one argument */ |
|||
if (argc != 2) |
|||
return 1; |
|||
|
|||
if (strcmp(argv[1], "depends") == 0) { |
|||
printf("ccan/build_assert\n"); |
|||
return 0; |
|||
} |
|||
if (strcmp(argv[1], "testdepends") == 0) { |
|||
printf("ccan/array_size\n"); |
|||
return 0; |
|||
} |
|||
|
|||
return 1; |
|||
} |
@ -0,0 +1,34 @@ |
|||
/* CC0 (Public domain) - see LICENSE file for details */ |
|||
#ifndef CCAN_PTRINT_H |
|||
#define CCAN_PTRINT_H |
|||
|
|||
#include "config.h" |
|||
|
|||
#include <stddef.h> |
|||
|
|||
#include <ccan/build_assert/build_assert.h> |
|||
|
|||
/*
|
|||
* This is a deliberately incomplete type, because it should never be |
|||
* dereferenced - instead it marks pointer values which are actually |
|||
* encoding integers |
|||
*/ |
|||
typedef struct ptrint ptrint_t; |
|||
|
|||
static inline ptrdiff_t ptr2int(const ptrint_t *p) |
|||
{ |
|||
/*
|
|||
* ptrdiff_t is the right size by definition, but to avoid |
|||
* surprises we want a warning if the user can't fit at least |
|||
* a regular int in there |
|||
*/ |
|||
BUILD_ASSERT(sizeof(int) <= sizeof(ptrdiff_t)); |
|||
return (const char *)p - (const char *)NULL; |
|||
} |
|||
|
|||
static inline ptrint_t *int2ptr(ptrdiff_t i) |
|||
{ |
|||
return (ptrint_t *)((char *)NULL + i); |
|||
} |
|||
|
|||
#endif /* CCAN_PTRINT_H */ |
@ -0,0 +1,29 @@ |
|||
#include <limits.h> |
|||
|
|||
#include <ccan/array_size/array_size.h> |
|||
|
|||
#include <ccan/ptrint/ptrint.h> |
|||
#include <ccan/tap/tap.h> |
|||
|
|||
static ptrdiff_t testvals[] = { |
|||
-INT_MAX, -1, 0, 1, 2, 17, INT_MAX, |
|||
}; |
|||
|
|||
int main(void) |
|||
{ |
|||
int i; |
|||
|
|||
/* This is how many tests you plan to run */ |
|||
plan_tests(2 * ARRAY_SIZE(testvals)); |
|||
|
|||
for (i = 0; i < ARRAY_SIZE(testvals); i++) { |
|||
ptrdiff_t val = testvals[i]; |
|||
void *ptr = int2ptr(val); |
|||
|
|||
ok1(ptr2int(ptr) == val); |
|||
ok1(!val == !ptr); |
|||
} |
|||
|
|||
/* This exits depending on whether all tests passed */ |
|||
return exit_status(); |
|||
} |
@ -0,0 +1,26 @@ |
|||
CFLAGS=-O3 -Wall -flto -I../../.. |
|||
#CFLAGS=-O3 -Wall -I../../..
|
|||
#CFLAGS=-g -Wall -I../../..
|
|||
LDFLAGS=-O3 -flto |
|||
LDLIBS=-lrt |
|||
|
|||
all: speed samba-allocs |
|||
|
|||
speed: speed.o tal.o talloc.o time.o list.o take.o str.o |
|||
samba-allocs: samba-allocs.o tal.o talloc.o time.o list.o take.o |
|||
|
|||
tal.o: ../tal.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
|||
str.o: ../str/str.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
|||
talloc.o: ../../talloc/talloc.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
|||
time.o: ../../time/time.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
|||
list.o: ../../list/list.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
|||
take.o: ../../take/take.c |
|||
$(CC) $(CFLAGS) -c -o $@ $< |
|||
|
|||
clean: |
|||
rm -f speed samba-allocs *.o |
Loading…
Reference in new issue