1 // Copyright 2026 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 // hash function using AES hardware instructions
8
9 // func memHash32AES(k uint32, h uintptr) uintptr
10 TEXT ·memHash32AES(SB),NOSPLIT,$0-12
11 MOVL h+4(FP), X0 // seed
12 PINSRD $1, k+0(FP), X0 // data
13 AESENC ·aeskeysched+0(SB), X0
14 AESENC ·aeskeysched+16(SB), X0
15 AESENC ·aeskeysched+32(SB), X0
16 MOVL X0, ret+8(FP)
17 RET
18
19 // func memHash64AES(k uint64, h uintptr) uintptr
20 TEXT ·memHash64AES(SB),NOSPLIT,$0-16
21 MOVQ k+0(FP), X0 // data
22 PINSRD $2, h+8(FP), X0 // seed
23 AESENC ·aeskeysched+0(SB), X0
24 AESENC ·aeskeysched+16(SB), X0
25 AESENC ·aeskeysched+32(SB), X0
26 MOVL X0, ret+12(FP)
27 RET
28
29 // func memHashAES(p unsafe.Pointer, h, size uintptr) uintptr
30 TEXT ·memHashAES(SB),NOSPLIT,$0-16
31 // AX: data
32 // BX: size
33 // DX: address to put return value
34 MOVL p+0(FP), AX
35 MOVL s+8(FP), BX
36 LEAL ret+12(FP), DX
37
38 MOVL h+4(FP), X0 // 32 bits of per-table hash seed
39 PINSRW $4, BX, X0 // 16 bits of length
40 PSHUFHW $0, X0, X0 // replace size with its low 2 bytes repeated 4 times
41 MOVO X0, X1 // save unscrambled seed
42 PXOR ·aeskeysched(SB), X0 // xor in per-process seed
43 AESENC X0, X0 // scramble seed
44
45 CMPL BX, $16
46 JB aes0to15
47 JE aes16
48 CMPL BX, $32
49 JBE aes17to32
50 CMPL BX, $64
51 JBE aes33to64
52 JMP aes65plus
53
54 aes0to15:
55 TESTL BX, BX
56 JE aes0
57
58 ADDL $16, AX
59 TESTW $0xff0, AX
60 JE endofpage
61
62 // 16 bytes loaded at this address won't cross
63 // a page boundary, so we can load it directly.
64 MOVOU -16(AX), X1
65 ADDL BX, BX
66 PAND masks<>(SB)(BX*8), X1
67
68 final1:
69 PXOR X0, X1 // xor data with seed
70 AESENC X1, X1 // scramble combo 3 times
71 AESENC X1, X1
72 AESENC X1, X1
73 MOVL X1, (DX)
74 RET
75
76 endofpage:
77 // address ends in 1111xxxx. Might be up against
78 // a page boundary, so load ending at last byte.
79 // Then shift bytes down using pshufb.
80 MOVOU -32(AX)(BX*1), X1
81 ADDL BX, BX
82 PSHUFB shifts<>(SB)(BX*8), X1
83 JMP final1
84
85 aes0:
86 // Return scrambled input seed
87 AESENC X0, X0
88 MOVL X0, (DX)
89 RET
90
91 aes16:
92 MOVOU (AX), X1
93 JMP final1
94
95 aes17to32:
96 // make second starting seed
97 PXOR ·aeskeysched+16(SB), X1
98 AESENC X1, X1
99
100 // load data to be hashed
101 MOVOU (AX), X2
102 MOVOU -16(AX)(BX*1), X3
103
104 // xor with seed
105 PXOR X0, X2
106 PXOR X1, X3
107
108 // scramble 3 times
109 AESENC X2, X2
110 AESENC X3, X3
111 AESENC X2, X2
112 AESENC X3, X3
113 AESENC X2, X2
114 AESENC X3, X3
115
116 // combine results
117 PXOR X3, X2
118 MOVL X2, (DX)
119 RET
120
121 aes33to64:
122 // make 3 more starting seeds
123 MOVO X1, X2
124 MOVO X1, X3
125 PXOR ·aeskeysched+16(SB), X1
126 PXOR ·aeskeysched+32(SB), X2
127 PXOR ·aeskeysched+48(SB), X3
128 AESENC X1, X1
129 AESENC X2, X2
130 AESENC X3, X3
131
132 MOVOU (AX), X4
133 MOVOU 16(AX), X5
134 MOVOU -32(AX)(BX*1), X6
135 MOVOU -16(AX)(BX*1), X7
136
137 PXOR X0, X4
138 PXOR X1, X5
139 PXOR X2, X6
140 PXOR X3, X7
141
142 AESENC X4, X4
143 AESENC X5, X5
144 AESENC X6, X6
145 AESENC X7, X7
146
147 AESENC X4, X4
148 AESENC X5, X5
149 AESENC X6, X6
150 AESENC X7, X7
151
152 AESENC X4, X4
153 AESENC X5, X5
154 AESENC X6, X6
155 AESENC X7, X7
156
157 PXOR X6, X4
158 PXOR X7, X5
159 PXOR X5, X4
160 MOVL X4, (DX)
161 RET
162
163 aes65plus:
164 // make 3 more starting seeds
165 MOVO X1, X2
166 MOVO X1, X3
167 PXOR ·aeskeysched+16(SB), X1
168 PXOR ·aeskeysched+32(SB), X2
169 PXOR ·aeskeysched+48(SB), X3
170 AESENC X1, X1
171 AESENC X2, X2
172 AESENC X3, X3
173
174 // start with last (possibly overlapping) block
175 MOVOU -64(AX)(BX*1), X4
176 MOVOU -48(AX)(BX*1), X5
177 MOVOU -32(AX)(BX*1), X6
178 MOVOU -16(AX)(BX*1), X7
179
180 // scramble state once
181 AESENC X0, X4
182 AESENC X1, X5
183 AESENC X2, X6
184 AESENC X3, X7
185
186 // compute number of remaining 64-byte blocks
187 DECL BX
188 SHRL $6, BX
189
190 aesloop:
191 // scramble state, xor in a block
192 MOVOU (AX), X0
193 MOVOU 16(AX), X1
194 MOVOU 32(AX), X2
195 MOVOU 48(AX), X3
196 AESENC X0, X4
197 AESENC X1, X5
198 AESENC X2, X6
199 AESENC X3, X7
200
201 // scramble state
202 AESENC X4, X4
203 AESENC X5, X5
204 AESENC X6, X6
205 AESENC X7, X7
206
207 ADDL $64, AX
208 DECL BX
209 JNE aesloop
210
211 // 3 more scrambles to finish
212 AESENC X4, X4
213 AESENC X5, X5
214 AESENC X6, X6
215 AESENC X7, X7
216
217 AESENC X4, X4
218 AESENC X5, X5
219 AESENC X6, X6
220 AESENC X7, X7
221
222 AESENC X4, X4
223 AESENC X5, X5
224 AESENC X6, X6
225 AESENC X7, X7
226
227 PXOR X6, X4
228 PXOR X7, X5
229 PXOR X5, X4
230 MOVL X4, (DX)
231 RET
232
233 // simple mask to get rid of data in the high part of the register.
234 DATA masks<>+0x00(SB)/4, $0x00000000
235 DATA masks<>+0x04(SB)/4, $0x00000000
236 DATA masks<>+0x08(SB)/4, $0x00000000
237 DATA masks<>+0x0c(SB)/4, $0x00000000
238
239 DATA masks<>+0x10(SB)/4, $0x000000ff
240 DATA masks<>+0x14(SB)/4, $0x00000000
241 DATA masks<>+0x18(SB)/4, $0x00000000
242 DATA masks<>+0x1c(SB)/4, $0x00000000
243
244 DATA masks<>+0x20(SB)/4, $0x0000ffff
245 DATA masks<>+0x24(SB)/4, $0x00000000
246 DATA masks<>+0x28(SB)/4, $0x00000000
247 DATA masks<>+0x2c(SB)/4, $0x00000000
248
249 DATA masks<>+0x30(SB)/4, $0x00ffffff
250 DATA masks<>+0x34(SB)/4, $0x00000000
251 DATA masks<>+0x38(SB)/4, $0x00000000
252 DATA masks<>+0x3c(SB)/4, $0x00000000
253
254 DATA masks<>+0x40(SB)/4, $0xffffffff
255 DATA masks<>+0x44(SB)/4, $0x00000000
256 DATA masks<>+0x48(SB)/4, $0x00000000
257 DATA masks<>+0x4c(SB)/4, $0x00000000
258
259 DATA masks<>+0x50(SB)/4, $0xffffffff
260 DATA masks<>+0x54(SB)/4, $0x000000ff
261 DATA masks<>+0x58(SB)/4, $0x00000000
262 DATA masks<>+0x5c(SB)/4, $0x00000000
263
264 DATA masks<>+0x60(SB)/4, $0xffffffff
265 DATA masks<>+0x64(SB)/4, $0x0000ffff
266 DATA masks<>+0x68(SB)/4, $0x00000000
267 DATA masks<>+0x6c(SB)/4, $0x00000000
268
269 DATA masks<>+0x70(SB)/4, $0xffffffff
270 DATA masks<>+0x74(SB)/4, $0x00ffffff
271 DATA masks<>+0x78(SB)/4, $0x00000000
272 DATA masks<>+0x7c(SB)/4, $0x00000000
273
274 DATA masks<>+0x80(SB)/4, $0xffffffff
275 DATA masks<>+0x84(SB)/4, $0xffffffff
276 DATA masks<>+0x88(SB)/4, $0x00000000
277 DATA masks<>+0x8c(SB)/4, $0x00000000
278
279 DATA masks<>+0x90(SB)/4, $0xffffffff
280 DATA masks<>+0x94(SB)/4, $0xffffffff
281 DATA masks<>+0x98(SB)/4, $0x000000ff
282 DATA masks<>+0x9c(SB)/4, $0x00000000
283
284 DATA masks<>+0xa0(SB)/4, $0xffffffff
285 DATA masks<>+0xa4(SB)/4, $0xffffffff
286 DATA masks<>+0xa8(SB)/4, $0x0000ffff
287 DATA masks<>+0xac(SB)/4, $0x00000000
288
289 DATA masks<>+0xb0(SB)/4, $0xffffffff
290 DATA masks<>+0xb4(SB)/4, $0xffffffff
291 DATA masks<>+0xb8(SB)/4, $0x00ffffff
292 DATA masks<>+0xbc(SB)/4, $0x00000000
293
294 DATA masks<>+0xc0(SB)/4, $0xffffffff
295 DATA masks<>+0xc4(SB)/4, $0xffffffff
296 DATA masks<>+0xc8(SB)/4, $0xffffffff
297 DATA masks<>+0xcc(SB)/4, $0x00000000
298
299 DATA masks<>+0xd0(SB)/4, $0xffffffff
300 DATA masks<>+0xd4(SB)/4, $0xffffffff
301 DATA masks<>+0xd8(SB)/4, $0xffffffff
302 DATA masks<>+0xdc(SB)/4, $0x000000ff
303
304 DATA masks<>+0xe0(SB)/4, $0xffffffff
305 DATA masks<>+0xe4(SB)/4, $0xffffffff
306 DATA masks<>+0xe8(SB)/4, $0xffffffff
307 DATA masks<>+0xec(SB)/4, $0x0000ffff
308
309 DATA masks<>+0xf0(SB)/4, $0xffffffff
310 DATA masks<>+0xf4(SB)/4, $0xffffffff
311 DATA masks<>+0xf8(SB)/4, $0xffffffff
312 DATA masks<>+0xfc(SB)/4, $0x00ffffff
313
314 GLOBL masks<>(SB),RODATA,$256
315
316 // these are arguments to pshufb. They move data down from
317 // the high bytes of the register to the low bytes of the register.
318 // index is how many bytes to move.
319 DATA shifts<>+0x00(SB)/4, $0x00000000
320 DATA shifts<>+0x04(SB)/4, $0x00000000
321 DATA shifts<>+0x08(SB)/4, $0x00000000
322 DATA shifts<>+0x0c(SB)/4, $0x00000000
323
324 DATA shifts<>+0x10(SB)/4, $0xffffff0f
325 DATA shifts<>+0x14(SB)/4, $0xffffffff
326 DATA shifts<>+0x18(SB)/4, $0xffffffff
327 DATA shifts<>+0x1c(SB)/4, $0xffffffff
328
329 DATA shifts<>+0x20(SB)/4, $0xffff0f0e
330 DATA shifts<>+0x24(SB)/4, $0xffffffff
331 DATA shifts<>+0x28(SB)/4, $0xffffffff
332 DATA shifts<>+0x2c(SB)/4, $0xffffffff
333
334 DATA shifts<>+0x30(SB)/4, $0xff0f0e0d
335 DATA shifts<>+0x34(SB)/4, $0xffffffff
336 DATA shifts<>+0x38(SB)/4, $0xffffffff
337 DATA shifts<>+0x3c(SB)/4, $0xffffffff
338
339 DATA shifts<>+0x40(SB)/4, $0x0f0e0d0c
340 DATA shifts<>+0x44(SB)/4, $0xffffffff
341 DATA shifts<>+0x48(SB)/4, $0xffffffff
342 DATA shifts<>+0x4c(SB)/4, $0xffffffff
343
344 DATA shifts<>+0x50(SB)/4, $0x0e0d0c0b
345 DATA shifts<>+0x54(SB)/4, $0xffffff0f
346 DATA shifts<>+0x58(SB)/4, $0xffffffff
347 DATA shifts<>+0x5c(SB)/4, $0xffffffff
348
349 DATA shifts<>+0x60(SB)/4, $0x0d0c0b0a
350 DATA shifts<>+0x64(SB)/4, $0xffff0f0e
351 DATA shifts<>+0x68(SB)/4, $0xffffffff
352 DATA shifts<>+0x6c(SB)/4, $0xffffffff
353
354 DATA shifts<>+0x70(SB)/4, $0x0c0b0a09
355 DATA shifts<>+0x74(SB)/4, $0xff0f0e0d
356 DATA shifts<>+0x78(SB)/4, $0xffffffff
357 DATA shifts<>+0x7c(SB)/4, $0xffffffff
358
359 DATA shifts<>+0x80(SB)/4, $0x0b0a0908
360 DATA shifts<>+0x84(SB)/4, $0x0f0e0d0c
361 DATA shifts<>+0x88(SB)/4, $0xffffffff
362 DATA shifts<>+0x8c(SB)/4, $0xffffffff
363
364 DATA shifts<>+0x90(SB)/4, $0x0a090807
365 DATA shifts<>+0x94(SB)/4, $0x0e0d0c0b
366 DATA shifts<>+0x98(SB)/4, $0xffffff0f
367 DATA shifts<>+0x9c(SB)/4, $0xffffffff
368
369 DATA shifts<>+0xa0(SB)/4, $0x09080706
370 DATA shifts<>+0xa4(SB)/4, $0x0d0c0b0a
371 DATA shifts<>+0xa8(SB)/4, $0xffff0f0e
372 DATA shifts<>+0xac(SB)/4, $0xffffffff
373
374 DATA shifts<>+0xb0(SB)/4, $0x08070605
375 DATA shifts<>+0xb4(SB)/4, $0x0c0b0a09
376 DATA shifts<>+0xb8(SB)/4, $0xff0f0e0d
377 DATA shifts<>+0xbc(SB)/4, $0xffffffff
378
379 DATA shifts<>+0xc0(SB)/4, $0x07060504
380 DATA shifts<>+0xc4(SB)/4, $0x0b0a0908
381 DATA shifts<>+0xc8(SB)/4, $0x0f0e0d0c
382 DATA shifts<>+0xcc(SB)/4, $0xffffffff
383
384 DATA shifts<>+0xd0(SB)/4, $0x06050403
385 DATA shifts<>+0xd4(SB)/4, $0x0a090807
386 DATA shifts<>+0xd8(SB)/4, $0x0e0d0c0b
387 DATA shifts<>+0xdc(SB)/4, $0xffffff0f
388
389 DATA shifts<>+0xe0(SB)/4, $0x05040302
390 DATA shifts<>+0xe4(SB)/4, $0x09080706
391 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a
392 DATA shifts<>+0xec(SB)/4, $0xffff0f0e
393
394 DATA shifts<>+0xf0(SB)/4, $0x04030201
395 DATA shifts<>+0xf4(SB)/4, $0x08070605
396 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09
397 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d
398
399 GLOBL shifts<>(SB),RODATA,$256
400
401 TEXT ·checkMasksAndShiftsAlignment(SB),NOSPLIT,$0-1
402 // check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
403 MOVL $masks<>(SB), AX
404 MOVL $shifts<>(SB), BX
405 ORL BX, AX
406 TESTL $15, AX
407 SETEQ ret+0(FP)
408 RET
409
View as plain text