1
2
3
4
5
6
7 package simd_test
8
9 import (
10 "reflect"
11 "simd/archsimd"
12 "slices"
13 "testing"
14 )
15
16 var sink any
17
18 func TestType(t *testing.T) {
19
20
21
22
23
24
25
26 type alias = archsimd.Int32x4
27 type maskT archsimd.Mask32x4
28 type myStruct struct {
29 x alias
30 y *archsimd.Int32x4
31 z maskT
32 }
33 vals := [4]int32{1, 2, 3, 4}
34 v := myStruct{x: archsimd.LoadInt32x4(&vals)}
35
36 want := []int32{2, 4, 0, 0}
37 y := archsimd.LoadInt32x4(&vals)
38 v.y = &y
39 sink = y
40
41 if !archsimd.X86.AVX512GFNI() {
42 t.Skip("Test requires X86.AVX512, not available on this hardware")
43 return
44 }
45 v.z = maskT(archsimd.Mask32x4FromBits(0b0011))
46 *v.y = v.y.Add(v.x).Masked(archsimd.Mask32x4(v.z))
47
48 got := [4]int32{}
49 v.y.Store(&got)
50 checkSlices(t, got[:], want)
51 }
52
53 func TestUncomparable(t *testing.T) {
54
55 var x, y any = archsimd.LoadUint32x4(&[4]uint32{1, 2, 3, 4}), archsimd.LoadUint32x4(&[4]uint32{5, 6, 7, 8})
56 shouldPanic := func(fn func()) {
57 defer func() {
58 if recover() == nil {
59 panic("did not panic")
60 }
61 }()
62 fn()
63 }
64 shouldPanic(func() { _ = x == y })
65 }
66
67 func TestFuncValue(t *testing.T) {
68
69 xv := [4]int32{1, 2, 3, 4}
70 yv := [4]int32{5, 6, 7, 8}
71 want := []int32{6, 8, 10, 12}
72 x := archsimd.LoadInt32x4(&xv)
73 y := archsimd.LoadInt32x4(&yv)
74 fn := archsimd.Int32x4.Add
75 sink = fn
76 x = fn(x, y)
77 got := [4]int32{}
78 x.Store(&got)
79 checkSlices(t, got[:], want)
80 }
81
82 func TestReflectMethod(t *testing.T) {
83
84
85 xv := [4]int32{1, 2, 3, 4}
86 yv := [4]int32{5, 6, 7, 8}
87 want := []int32{6, 8, 10, 12}
88 x := archsimd.LoadInt32x4(&xv)
89 y := archsimd.LoadInt32x4(&yv)
90 m, ok := reflect.TypeOf(x).MethodByName("Add")
91 if !ok {
92 t.Fatal("Add method not found")
93 }
94 fn := m.Func.Interface().(func(x, y archsimd.Int32x4) archsimd.Int32x4)
95 x = fn(x, y)
96 got := [4]int32{}
97 x.Store(&got)
98 checkSlices(t, got[:], want)
99 }
100
101 func TestVectorConversion(t *testing.T) {
102 if !archsimd.X86.AVX512GFNI() {
103 t.Skip("Test requires X86.AVX512, not available on this hardware")
104 return
105 }
106 xv := [4]int32{1, 2, 3, 4}
107 x := archsimd.LoadInt32x4(&xv)
108 xPromoted := x.AsInt64x2()
109 xPromotedDemoted := xPromoted.AsInt32x4()
110 got := [4]int32{}
111 xPromotedDemoted.Store(&got)
112 for i := range 4 {
113 if xv[i] != got[i] {
114 t.Errorf("Result at %d incorrect: want %d, got %d", i, xv[i], got[i])
115 }
116 }
117 }
118
119 func TestMaskConversion(t *testing.T) {
120 if !archsimd.X86.AVX512GFNI() {
121 t.Skip("Test requires X86.AVX512, not available on this hardware")
122 return
123 }
124 x := archsimd.LoadInt32x4Slice([]int32{5, 0, 7, 0})
125 mask := archsimd.Int32x4{}.Sub(x).ToMask()
126 y := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask)
127 want := [4]int32{6, 0, 10, 0}
128 got := make([]int32, 4)
129 y.StoreSlice(got)
130 checkSlices(t, got[:], want[:])
131 }
132
133 func TestPermute(t *testing.T) {
134 if !archsimd.X86.AVX512() {
135 t.Skip("Test requires X86.AVX512, not available on this hardware")
136 return
137 }
138 x := []int64{1, 2, 3, 4, 5, 6, 7, 8}
139 indices := []uint64{7, 6, 5, 4, 3, 2, 1, 0}
140 want := []int64{8, 7, 6, 5, 4, 3, 2, 1}
141 got := make([]int64, 8)
142 archsimd.LoadInt64x8Slice(x).Permute(archsimd.LoadUint64x8Slice(indices)).StoreSlice(got)
143 checkSlices(t, got, want)
144 }
145
146 func TestPermuteOrZero(t *testing.T) {
147 x := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
148 indices := []int8{7, 6, 5, 4, 3, 2, 1, 0, -1, 8, -1, 9, -1, 10, -1, 11}
149 want := []uint8{8, 7, 6, 5, 4, 3, 2, 1, 0, 9, 0, 10, 0, 11, 0, 12}
150 got := make([]uint8, len(x))
151 archsimd.LoadUint8x16Slice(x).PermuteOrZero(archsimd.LoadInt8x16Slice(indices)).StoreSlice(got)
152 checkSlices(t, got, want)
153 }
154
155 func TestConcatPermute(t *testing.T) {
156 if !archsimd.X86.AVX512() {
157 t.Skip("Test requires X86.AVX512, not available on this hardware")
158 return
159 }
160 x := []int64{1, 2, 3, 4, 5, 6, 7, 8}
161 y := []int64{-1, -2, -3, -4, -5, -6, -7, -8}
162 indices := []uint64{7 + 8, 6, 5 + 8, 4, 3 + 8, 2, 1 + 8, 0}
163 want := []int64{-8, 7, -6, 5, -4, 3, -2, 1}
164 got := make([]int64, 8)
165 archsimd.LoadInt64x8Slice(x).ConcatPermute(archsimd.LoadInt64x8Slice(y), archsimd.LoadUint64x8Slice(indices)).StoreSlice(got)
166 checkSlices(t, got, want)
167 }
168
169 func TestCompress(t *testing.T) {
170 if !archsimd.X86.AVX512() {
171 t.Skip("Test requires X86.AVX512, not available on this hardware")
172 return
173 }
174 v1234 := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
175 v2400 := v1234.Compress(archsimd.Mask32x4FromBits(0b1010))
176 got := make([]int32, 4)
177 v2400.StoreSlice(got)
178 want := []int32{2, 4, 0, 0}
179 if !slices.Equal(got, want) {
180 t.Errorf("want and got differ, want=%v, got=%v", want, got)
181 }
182 }
183
184 func TestExpand(t *testing.T) {
185 if !archsimd.X86.AVX512() {
186 t.Skip("Test requires X86.AVX512, not available on this hardware")
187 return
188 }
189 v3400 := archsimd.LoadInt32x4Slice([]int32{3, 4, 0, 0})
190 v2400 := v3400.Expand(archsimd.Mask32x4FromBits(0b1010))
191 got := make([]int32, 4)
192 v2400.StoreSlice(got)
193 want := []int32{0, 3, 0, 4}
194 if !slices.Equal(got, want) {
195 t.Errorf("want and got differ, want=%v, got=%v", want, got)
196 }
197 }
198
199 var testShiftAllVal uint64 = 3
200
201 func TestShiftAll(t *testing.T) {
202 got := make([]int32, 4)
203 archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(2).StoreSlice(got)
204 for _, v := range got {
205 if v != 0b1100 {
206 t.Errorf("expect 0b1100, got %b", v)
207 }
208 }
209 archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(testShiftAllVal).StoreSlice(got)
210 for _, v := range got {
211 if v != 0b11000 {
212 t.Errorf("expect 0b11000, got %b", v)
213 }
214 }
215 }
216
217 func TestSlicesInt8(t *testing.T) {
218 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
219 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
220 v := archsimd.LoadInt8x32Slice(a)
221 b := make([]int8, 32, 32)
222 v.StoreSlice(b)
223 checkSlices(t, a, b)
224 }
225
226 func TestSlicesInt8SetElem(t *testing.T) {
227 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
228 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
229 v := archsimd.LoadInt8x16Slice(a)
230
231 v = v.SetElem(3, 13)
232 a[3] = 13
233
234 b := make([]int8, 16, 16)
235 v.StoreSlice(b)
236 checkSlices(t, a, b)
237 }
238
239 func TestSlicesInt8GetElem(t *testing.T) {
240 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
241 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
242 v := archsimd.LoadInt8x16Slice(a)
243 e := v.GetElem(2)
244 if e != a[2] {
245 t.Errorf("GetElem(2) = %d != a[2] = %d", e, a[2])
246 }
247
248 }
249
250 func TestSlicesInt8TooShortLoad(t *testing.T) {
251 defer func() {
252 if r := recover(); r != nil {
253 t.Logf("Saw EXPECTED panic %v", r)
254 } else {
255 t.Errorf("Did not see expected panic")
256 }
257 }()
258 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
259 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}
260 v := archsimd.LoadInt8x32Slice(a)
261 b := make([]int8, 32, 32)
262 v.StoreSlice(b)
263 checkSlices(t, a, b)
264 }
265
266 func TestSlicesInt8TooShortStore(t *testing.T) {
267 defer func() {
268 if r := recover(); r != nil {
269 t.Logf("Saw EXPECTED panic %v", r)
270 } else {
271 t.Errorf("Did not see expected panic")
272 }
273 }()
274 a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
275 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
276 v := archsimd.LoadInt8x32Slice(a)
277 b := make([]int8, 31)
278 v.StoreSlice(b)
279 checkSlices(t, a, b)
280 }
281
282 func TestSlicesFloat64(t *testing.T) {
283 a := []float64{1, 2, 3, 4, 5, 6, 7, 8}
284 v := archsimd.LoadFloat64x4Slice(a)
285 b := make([]float64, 4, 4)
286 v.StoreSlice(b)
287 for i := range b {
288 if a[i] != b[i] {
289 t.Errorf("a and b differ at index %d, a=%f, b=%f", i, a[i], b[i])
290 }
291 }
292 }
293
294
295 func TestMergeLocals(t *testing.T) {
296 testMergeLocalswrapper(t, archsimd.Int64x4.Add)
297 }
298
299
300 func forceSpill() {}
301
302 func testMergeLocalswrapper(t *testing.T, op func(archsimd.Int64x4, archsimd.Int64x4) archsimd.Int64x4) {
303 t.Helper()
304 s0 := []int64{0, 1, 2, 3}
305 s1 := []int64{-1, 0, -1, 0}
306 want := []int64{-1, 1, 1, 3}
307 v := archsimd.LoadInt64x4Slice(s0)
308 m := archsimd.LoadInt64x4Slice(s1)
309 forceSpill()
310 got := make([]int64, 4)
311 gotv := op(v, m)
312 gotv.StoreSlice(got)
313 for i := range len(want) {
314 if !(got[i] == want[i]) {
315 t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i])
316 }
317 }
318 }
319
320 func TestBitMaskFromBits(t *testing.T) {
321 if !archsimd.X86.AVX512() {
322 t.Skip("Test requires X86.AVX512, not available on this hardware")
323 return
324 }
325 results := [2]int64{}
326 want := [2]int64{0, 6}
327 m := archsimd.Mask64x2FromBits(0b10)
328 archsimd.LoadInt64x2Slice([]int64{1, 2}).Add(archsimd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
329 for i := range 2 {
330 if results[i] != want[i] {
331 t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
332 }
333 }
334 }
335
336 var maskForTestBitMaskFromBitsLoad = uint8(0b10)
337
338 func TestBitMaskFromBitsLoad(t *testing.T) {
339 if !archsimd.X86.AVX512() {
340 t.Skip("Test requires X86.AVX512, not available on this hardware")
341 return
342 }
343 results := [2]int64{}
344 want := [2]int64{0, 6}
345 m := archsimd.Mask64x2FromBits(maskForTestBitMaskFromBitsLoad)
346 archsimd.LoadInt64x2Slice([]int64{1, 2}).Add(archsimd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
347 for i := range 2 {
348 if results[i] != want[i] {
349 t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
350 }
351 }
352 }
353
354 func TestBitMaskToBits(t *testing.T) {
355 if !archsimd.X86.AVX512() {
356 t.Skip("Test requires X86.AVX512, not available on this hardware")
357 return
358 }
359 if v := archsimd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits(); v != 0b101 {
360 t.Errorf("Want 0b101, got %b", v)
361 }
362 }
363
364 var maskForTestBitMaskFromBitsStore uint8
365
366 func TestBitMaskToBitsStore(t *testing.T) {
367 if !archsimd.X86.AVX512() {
368 t.Skip("Test requires X86.AVX512, not available on this hardware")
369 return
370 }
371 maskForTestBitMaskFromBitsStore = archsimd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits()
372 if maskForTestBitMaskFromBitsStore != 0b101 {
373 t.Errorf("Want 0b101, got %b", maskForTestBitMaskFromBitsStore)
374 }
375 }
376
377 func TestMergeFloat(t *testing.T) {
378 k := make([]int64, 4, 4)
379 s := make([]float64, 4, 4)
380
381 a := archsimd.LoadFloat64x4Slice([]float64{1, 2, 3, 4})
382 b := archsimd.LoadFloat64x4Slice([]float64{4, 2, 3, 1})
383 g := a.Greater(b)
384 g.ToInt64x4().StoreSlice(k)
385 c := a.Merge(b, g)
386
387 c.StoreSlice(s)
388
389 checkSlices[int64](t, k, []int64{0, 0, 0, -1})
390 checkSlices[float64](t, s, []float64{4, 2, 3, 4})
391 }
392
393 func TestMergeFloat512(t *testing.T) {
394 if !archsimd.X86.AVX512() {
395 t.Skip("Test requires X86.AVX512, not available on this hardware")
396 return
397 }
398
399 k := make([]int64, 8, 8)
400 s := make([]float64, 8, 8)
401
402 a := archsimd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8})
403 b := archsimd.LoadFloat64x8Slice([]float64{8, 7, 6, 5, 4, 2, 3, 1})
404 g := a.Greater(b)
405 g.ToInt64x8().StoreSlice(k)
406 c := a.Merge(b, g)
407 d := a.Masked(g)
408
409 checkSlices[int64](t, k, []int64{0, 0, 0, 0, -1, -1, -1, -1})
410
411 c.StoreSlice(s)
412 checkSlices[float64](t, s, []float64{8, 7, 6, 5, 5, 6, 7, 8})
413
414 d.StoreSlice(s)
415 checkSlices[float64](t, s, []float64{0, 0, 0, 0, 5, 6, 7, 8})
416 }
417
418 var ro uint8 = 2
419
420 func TestRotateAllVariable(t *testing.T) {
421 if !archsimd.X86.AVX512() {
422 t.Skip("Test requires X86.AVX512, not available on this hardware")
423 return
424 }
425 got := make([]int32, 4)
426 archsimd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).RotateAllLeft(ro).StoreSlice(got)
427 for _, v := range got {
428 if v != 0b1100 {
429 t.Errorf("Want 0b1100, got %b", v)
430 }
431 }
432 }
433
434 func TestBroadcastUint32x4(t *testing.T) {
435 s := make([]uint32, 4, 4)
436 archsimd.BroadcastUint32x4(123456789).StoreSlice(s)
437 checkSlices(t, s, []uint32{123456789, 123456789, 123456789, 123456789})
438 }
439
440 func TestBroadcastFloat32x8(t *testing.T) {
441 s := make([]float32, 8, 8)
442 archsimd.BroadcastFloat32x8(123456789).StoreSlice(s)
443 checkSlices(t, s, []float32{123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789})
444 }
445
446 func TestBroadcastFloat64x2(t *testing.T) {
447 s := make([]float64, 2, 2)
448 archsimd.BroadcastFloat64x2(123456789).StoreSlice(s)
449 checkSlices(t, s, []float64{123456789, 123456789})
450 }
451
452 func TestBroadcastUint64x2(t *testing.T) {
453 s := make([]uint64, 2, 2)
454 archsimd.BroadcastUint64x2(123456789).StoreSlice(s)
455 checkSlices(t, s, []uint64{123456789, 123456789})
456 }
457
458 func TestBroadcastUint16x8(t *testing.T) {
459 s := make([]uint16, 8, 8)
460 archsimd.BroadcastUint16x8(12345).StoreSlice(s)
461 checkSlices(t, s, []uint16{12345, 12345, 12345, 12345})
462 }
463
464 func TestBroadcastInt8x32(t *testing.T) {
465 s := make([]int8, 32, 32)
466 archsimd.BroadcastInt8x32(-123).StoreSlice(s)
467 checkSlices(t, s, []int8{-123, -123, -123, -123, -123, -123, -123, -123,
468 -123, -123, -123, -123, -123, -123, -123, -123,
469 -123, -123, -123, -123, -123, -123, -123, -123,
470 -123, -123, -123, -123, -123, -123, -123, -123,
471 })
472 }
473
474 func TestMaskOpt512(t *testing.T) {
475 if !archsimd.X86.AVX512() {
476 t.Skip("Test requires X86.AVX512, not available on this hardware")
477 return
478 }
479
480 k := make([]int64, 8, 8)
481 s := make([]float64, 8, 8)
482
483 a := archsimd.LoadFloat64x8Slice([]float64{2, 0, 2, 0, 2, 0, 2, 0})
484 b := archsimd.LoadFloat64x8Slice([]float64{1, 1, 1, 1, 1, 1, 1, 1})
485 c := archsimd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8})
486 d := archsimd.LoadFloat64x8Slice([]float64{2, 4, 6, 8, 10, 12, 14, 16})
487 g := a.Greater(b)
488 e := c.Add(d).Masked(g)
489 e.StoreSlice(s)
490 g.ToInt64x8().StoreSlice(k)
491 checkSlices[int64](t, k, []int64{-1, 0, -1, 0, -1, 0, -1, 0})
492 checkSlices[float64](t, s, []float64{3, 0, 9, 0, 15, 0, 21, 0})
493 }
494
495
496
497
498
499 func flattenedTranspose(x, y archsimd.Int32x4) (a, b archsimd.Int32x4) {
500 return x.InterleaveLo(y), x.InterleaveHi(y)
501 }
502
503 func TestFlattenedTranspose(t *testing.T) {
504 r := make([]int32, 4, 4)
505 s := make([]int32, 4, 4)
506
507 x := archsimd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD})
508 y := archsimd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
509 a, b := flattenedTranspose(x, y)
510
511 a.StoreSlice(r)
512 b.StoreSlice(s)
513
514 checkSlices[int32](t, r, []int32{0xA, 1, 0xB, 2})
515 checkSlices[int32](t, s, []int32{0xC, 3, 0xD, 4})
516
517 }
518
519 func TestClearAVXUpperBits(t *testing.T) {
520
521
522 if !archsimd.X86.AVX2() {
523 t.Skip("Test requires X86.AVX2, not available on this hardware")
524 return
525 }
526
527 r := make([]int64, 4)
528 s := make([]int64, 4)
529
530 x := archsimd.LoadInt64x4Slice([]int64{10, 20, 30, 40})
531 y := archsimd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
532
533 x.Add(y).StoreSlice(r)
534 archsimd.ClearAVXUpperBits()
535 x.Sub(y).StoreSlice(s)
536
537 checkSlices[int64](t, r, []int64{11, 22, 33, 44})
538 checkSlices[int64](t, s, []int64{9, 18, 27, 36})
539 }
540
541 func TestLeadingZeros(t *testing.T) {
542 if !archsimd.X86.AVX512() {
543 t.Skip("Test requires X86.AVX512, not available on this hardware")
544 return
545 }
546
547 src := []uint64{0b1111, 0}
548 want := []uint64{60, 64}
549 got := make([]uint64, 2)
550 archsimd.LoadUint64x2Slice(src).LeadingZeros().StoreSlice(got)
551 for i := range 2 {
552 if want[i] != got[i] {
553 t.Errorf("Result incorrect at %d: want %d, got %d", i, want[i], got[i])
554 }
555 }
556 }
557
558 func TestIsZero(t *testing.T) {
559 v1 := archsimd.LoadUint64x2Slice([]uint64{0, 1})
560 v2 := archsimd.LoadUint64x2Slice([]uint64{0, 0})
561 if v1.IsZero() {
562 t.Errorf("Result incorrect, want false, got true")
563 }
564 if !v2.IsZero() {
565 t.Errorf("Result incorrect, want true, got false")
566 }
567 if !v1.And(v2).IsZero() {
568 t.Errorf("Result incorrect, want true, got false")
569 }
570 if v1.AndNot(v2).IsZero() {
571 t.Errorf("Result incorrect, want false, got true")
572 }
573 if !v2.And(v1).IsZero() {
574 t.Errorf("Result incorrect, want true, got false")
575 }
576 if !v2.AndNot(v1).IsZero() {
577 t.Errorf("Result incorrect, want true, got false")
578 }
579 }
580
581 func TestSelect4FromPairConst(t *testing.T) {
582 x := archsimd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
583 y := archsimd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
584
585 llll := x.SelectFromPair(0, 1, 2, 3, y)
586 hhhh := x.SelectFromPair(4, 5, 6, 7, y)
587 llhh := x.SelectFromPair(0, 1, 6, 7, y)
588 hhll := x.SelectFromPair(6, 7, 0, 1, y)
589
590 lllh := x.SelectFromPair(0, 1, 2, 7, y)
591 llhl := x.SelectFromPair(0, 1, 7, 2, y)
592 lhll := x.SelectFromPair(0, 7, 1, 2, y)
593 hlll := x.SelectFromPair(7, 0, 1, 2, y)
594
595 hhhl := x.SelectFromPair(4, 5, 6, 0, y)
596 hhlh := x.SelectFromPair(4, 5, 0, 6, y)
597 hlhh := x.SelectFromPair(4, 0, 5, 6, y)
598 lhhh := x.SelectFromPair(0, 4, 5, 6, y)
599
600 lhlh := x.SelectFromPair(0, 4, 1, 5, y)
601 hlhl := x.SelectFromPair(4, 0, 5, 1, y)
602 lhhl := x.SelectFromPair(0, 4, 5, 1, y)
603 hllh := x.SelectFromPair(4, 0, 1, 5, y)
604
605 r := make([]int32, 4, 4)
606
607 foo := func(v archsimd.Int32x4, a, b, c, d int32) {
608 v.StoreSlice(r)
609 checkSlices[int32](t, r, []int32{a, b, c, d})
610 }
611
612 foo(llll, 0, 1, 2, 3)
613 foo(hhhh, 4, 5, 6, 7)
614 foo(llhh, 0, 1, 6, 7)
615 foo(hhll, 6, 7, 0, 1)
616
617 foo(lllh, 0, 1, 2, 7)
618 foo(llhl, 0, 1, 7, 2)
619 foo(lhll, 0, 7, 1, 2)
620 foo(hlll, 7, 0, 1, 2)
621
622 foo(hhhl, 4, 5, 6, 0)
623 foo(hhlh, 4, 5, 0, 6)
624 foo(hlhh, 4, 0, 5, 6)
625 foo(lhhh, 0, 4, 5, 6)
626
627 foo(lhlh, 0, 4, 1, 5)
628 foo(hlhl, 4, 0, 5, 1)
629 foo(lhhl, 0, 4, 5, 1)
630 foo(hllh, 4, 0, 1, 5)
631 }
632
633
634 func selectFromPairInt32x4(x archsimd.Int32x4, a, b, c, d uint8, y archsimd.Int32x4) archsimd.Int32x4 {
635 return x.SelectFromPair(a, b, c, d, y)
636 }
637
638 func TestSelect4FromPairVar(t *testing.T) {
639 x := archsimd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
640 y := archsimd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
641
642 llll := selectFromPairInt32x4(x, 0, 1, 2, 3, y)
643 hhhh := selectFromPairInt32x4(x, 4, 5, 6, 7, y)
644 llhh := selectFromPairInt32x4(x, 0, 1, 6, 7, y)
645 hhll := selectFromPairInt32x4(x, 6, 7, 0, 1, y)
646
647 lllh := selectFromPairInt32x4(x, 0, 1, 2, 7, y)
648 llhl := selectFromPairInt32x4(x, 0, 1, 7, 2, y)
649 lhll := selectFromPairInt32x4(x, 0, 7, 1, 2, y)
650 hlll := selectFromPairInt32x4(x, 7, 0, 1, 2, y)
651
652 hhhl := selectFromPairInt32x4(x, 4, 5, 6, 0, y)
653 hhlh := selectFromPairInt32x4(x, 4, 5, 0, 6, y)
654 hlhh := selectFromPairInt32x4(x, 4, 0, 5, 6, y)
655 lhhh := selectFromPairInt32x4(x, 0, 4, 5, 6, y)
656
657 lhlh := selectFromPairInt32x4(x, 0, 4, 1, 5, y)
658 hlhl := selectFromPairInt32x4(x, 4, 0, 5, 1, y)
659 lhhl := selectFromPairInt32x4(x, 0, 4, 5, 1, y)
660 hllh := selectFromPairInt32x4(x, 4, 0, 1, 5, y)
661
662 r := make([]int32, 4, 4)
663
664 foo := func(v archsimd.Int32x4, a, b, c, d int32) {
665 v.StoreSlice(r)
666 checkSlices[int32](t, r, []int32{a, b, c, d})
667 }
668
669 foo(llll, 0, 1, 2, 3)
670 foo(hhhh, 4, 5, 6, 7)
671 foo(llhh, 0, 1, 6, 7)
672 foo(hhll, 6, 7, 0, 1)
673
674 foo(lllh, 0, 1, 2, 7)
675 foo(llhl, 0, 1, 7, 2)
676 foo(lhll, 0, 7, 1, 2)
677 foo(hlll, 7, 0, 1, 2)
678
679 foo(hhhl, 4, 5, 6, 0)
680 foo(hhlh, 4, 5, 0, 6)
681 foo(hlhh, 4, 0, 5, 6)
682 foo(lhhh, 0, 4, 5, 6)
683
684 foo(lhlh, 0, 4, 1, 5)
685 foo(hlhl, 4, 0, 5, 1)
686 foo(lhhl, 0, 4, 5, 1)
687 foo(hllh, 4, 0, 1, 5)
688 }
689
690 func TestSelect4FromPairConstGrouped(t *testing.T) {
691 x := archsimd.LoadFloat32x8Slice([]float32{0, 1, 2, 3, 10, 11, 12, 13})
692 y := archsimd.LoadFloat32x8Slice([]float32{4, 5, 6, 7, 14, 15, 16, 17})
693
694 llll := x.SelectFromPairGrouped(0, 1, 2, 3, y)
695 hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y)
696 llhh := x.SelectFromPairGrouped(0, 1, 6, 7, y)
697 hhll := x.SelectFromPairGrouped(6, 7, 0, 1, y)
698
699 lllh := x.SelectFromPairGrouped(0, 1, 2, 7, y)
700 llhl := x.SelectFromPairGrouped(0, 1, 7, 2, y)
701 lhll := x.SelectFromPairGrouped(0, 7, 1, 2, y)
702 hlll := x.SelectFromPairGrouped(7, 0, 1, 2, y)
703
704 hhhl := x.SelectFromPairGrouped(4, 5, 6, 0, y)
705 hhlh := x.SelectFromPairGrouped(4, 5, 0, 6, y)
706 hlhh := x.SelectFromPairGrouped(4, 0, 5, 6, y)
707 lhhh := x.SelectFromPairGrouped(0, 4, 5, 6, y)
708
709 lhlh := x.SelectFromPairGrouped(0, 4, 1, 5, y)
710 hlhl := x.SelectFromPairGrouped(4, 0, 5, 1, y)
711 lhhl := x.SelectFromPairGrouped(0, 4, 5, 1, y)
712 hllh := x.SelectFromPairGrouped(4, 0, 1, 5, y)
713
714 r := make([]float32, 8, 8)
715
716 foo := func(v archsimd.Float32x8, a, b, c, d float32) {
717 v.StoreSlice(r)
718 checkSlices[float32](t, r, []float32{a, b, c, d, 10 + a, 10 + b, 10 + c, 10 + d})
719 }
720
721 foo(llll, 0, 1, 2, 3)
722 foo(hhhh, 4, 5, 6, 7)
723 foo(llhh, 0, 1, 6, 7)
724 foo(hhll, 6, 7, 0, 1)
725
726 foo(lllh, 0, 1, 2, 7)
727 foo(llhl, 0, 1, 7, 2)
728 foo(lhll, 0, 7, 1, 2)
729 foo(hlll, 7, 0, 1, 2)
730
731 foo(hhhl, 4, 5, 6, 0)
732 foo(hhlh, 4, 5, 0, 6)
733 foo(hlhh, 4, 0, 5, 6)
734 foo(lhhh, 0, 4, 5, 6)
735
736 foo(lhlh, 0, 4, 1, 5)
737 foo(hlhl, 4, 0, 5, 1)
738 foo(lhhl, 0, 4, 5, 1)
739 foo(hllh, 4, 0, 1, 5)
740 }
741
742 func TestSelectFromPairConstGroupedUint32x16(t *testing.T) {
743 if !archsimd.X86.AVX512() {
744 t.Skip("Test requires X86.AVX512, not available on this hardware")
745 return
746 }
747 x := archsimd.LoadUint32x16Slice([]uint32{0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23, 30, 31, 32, 33})
748 y := archsimd.LoadUint32x16Slice([]uint32{4, 5, 6, 7, 14, 15, 16, 17, 24, 25, 26, 27, 34, 35, 36, 37})
749
750 llll := x.SelectFromPairGrouped(0, 1, 2, 3, y)
751 hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y)
752 llhh := x.SelectFromPairGrouped(0, 1, 6, 7, y)
753 hhll := x.SelectFromPairGrouped(6, 7, 0, 1, y)
754
755 lllh := x.SelectFromPairGrouped(0, 1, 2, 7, y)
756 llhl := x.SelectFromPairGrouped(0, 1, 7, 2, y)
757 lhll := x.SelectFromPairGrouped(0, 7, 1, 2, y)
758 hlll := x.SelectFromPairGrouped(7, 0, 1, 2, y)
759
760 hhhl := x.SelectFromPairGrouped(4, 5, 6, 0, y)
761 hhlh := x.SelectFromPairGrouped(4, 5, 0, 6, y)
762 hlhh := x.SelectFromPairGrouped(4, 0, 5, 6, y)
763 lhhh := x.SelectFromPairGrouped(0, 4, 5, 6, y)
764
765 lhlh := x.SelectFromPairGrouped(0, 4, 1, 5, y)
766 hlhl := x.SelectFromPairGrouped(4, 0, 5, 1, y)
767 lhhl := x.SelectFromPairGrouped(0, 4, 5, 1, y)
768 hllh := x.SelectFromPairGrouped(4, 0, 1, 5, y)
769
770 r := make([]uint32, 16, 16)
771
772 foo := func(v archsimd.Uint32x16, a, b, c, d uint32) {
773 v.StoreSlice(r)
774 checkSlices[uint32](t, r, []uint32{a, b, c, d,
775 10 + a, 10 + b, 10 + c, 10 + d,
776 20 + a, 20 + b, 20 + c, 20 + d,
777 30 + a, 30 + b, 30 + c, 30 + d,
778 })
779 }
780
781 foo(llll, 0, 1, 2, 3)
782 foo(hhhh, 4, 5, 6, 7)
783 foo(llhh, 0, 1, 6, 7)
784 foo(hhll, 6, 7, 0, 1)
785
786 foo(lllh, 0, 1, 2, 7)
787 foo(llhl, 0, 1, 7, 2)
788 foo(lhll, 0, 7, 1, 2)
789 foo(hlll, 7, 0, 1, 2)
790
791 foo(hhhl, 4, 5, 6, 0)
792 foo(hhlh, 4, 5, 0, 6)
793 foo(hlhh, 4, 0, 5, 6)
794 foo(lhhh, 0, 4, 5, 6)
795
796 foo(lhlh, 0, 4, 1, 5)
797 foo(hlhl, 4, 0, 5, 1)
798 foo(lhhl, 0, 4, 5, 1)
799 foo(hllh, 4, 0, 1, 5)
800 }
801
802 func TestSelect128FromPair(t *testing.T) {
803 x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
804 y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
805
806 aa := x.Select128FromPair(0, 0, y)
807 ab := x.Select128FromPair(0, 1, y)
808 bc := x.Select128FromPair(1, 2, y)
809 cd := x.Select128FromPair(2, 3, y)
810 da := x.Select128FromPair(3, 0, y)
811 dc := x.Select128FromPair(3, 2, y)
812
813 r := make([]uint64, 4, 4)
814
815 foo := func(v archsimd.Uint64x4, a, b uint64) {
816 a, b = 2*a, 2*b
817 v.StoreSlice(r)
818 checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1})
819 }
820
821 foo(aa, 0, 0)
822 foo(ab, 0, 1)
823 foo(bc, 1, 2)
824 foo(cd, 2, 3)
825 foo(da, 3, 0)
826 foo(dc, 3, 2)
827 }
828
829 func TestSelect128FromPairError(t *testing.T) {
830 x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
831 y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
832
833 defer func() {
834 if r := recover(); r != nil {
835 t.Logf("Saw expected panic %v", r)
836 }
837 }()
838 _ = x.Select128FromPair(0, 4, y)
839
840 t.Errorf("Should have panicked")
841 }
842
843
844 func select128FromPair(x archsimd.Uint64x4, lo, hi uint8, y archsimd.Uint64x4) archsimd.Uint64x4 {
845 return x.Select128FromPair(lo, hi, y)
846 }
847
848 func TestSelect128FromPairVar(t *testing.T) {
849 x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 2, 3})
850 y := archsimd.LoadUint64x4Slice([]uint64{4, 5, 6, 7})
851
852 aa := select128FromPair(x, 0, 0, y)
853 ab := select128FromPair(x, 0, 1, y)
854 bc := select128FromPair(x, 1, 2, y)
855 cd := select128FromPair(x, 2, 3, y)
856 da := select128FromPair(x, 3, 0, y)
857 dc := select128FromPair(x, 3, 2, y)
858
859 r := make([]uint64, 4, 4)
860
861 foo := func(v archsimd.Uint64x4, a, b uint64) {
862 a, b = 2*a, 2*b
863 v.StoreSlice(r)
864 checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1})
865 }
866
867 foo(aa, 0, 0)
868 foo(ab, 0, 1)
869 foo(bc, 1, 2)
870 foo(cd, 2, 3)
871 foo(da, 3, 0)
872 foo(dc, 3, 2)
873 }
874
875 func TestSelect2FromPairConst(t *testing.T) {
876 x := archsimd.LoadUint64x2Slice([]uint64{0, 1})
877 y := archsimd.LoadUint64x2Slice([]uint64{2, 3})
878
879 ll := x.SelectFromPair(0, 1, y)
880 hh := x.SelectFromPair(3, 2, y)
881 lh := x.SelectFromPair(0, 3, y)
882 hl := x.SelectFromPair(2, 1, y)
883
884 r := make([]uint64, 2, 2)
885
886 foo := func(v archsimd.Uint64x2, a, b uint64) {
887 v.StoreSlice(r)
888 checkSlices[uint64](t, r, []uint64{a, b})
889 }
890
891 foo(ll, 0, 1)
892 foo(hh, 3, 2)
893 foo(lh, 0, 3)
894 foo(hl, 2, 1)
895 }
896
897 func TestSelect2FromPairConstGroupedUint(t *testing.T) {
898 x := archsimd.LoadUint64x4Slice([]uint64{0, 1, 10, 11})
899 y := archsimd.LoadUint64x4Slice([]uint64{2, 3, 12, 13})
900
901 ll := x.SelectFromPairGrouped(0, 1, y)
902 hh := x.SelectFromPairGrouped(3, 2, y)
903 lh := x.SelectFromPairGrouped(0, 3, y)
904 hl := x.SelectFromPairGrouped(2, 1, y)
905
906 r := make([]uint64, 4, 4)
907
908 foo := func(v archsimd.Uint64x4, a, b uint64) {
909 v.StoreSlice(r)
910 checkSlices[uint64](t, r, []uint64{a, b, a + 10, b + 10})
911 }
912
913 foo(ll, 0, 1)
914 foo(hh, 3, 2)
915 foo(lh, 0, 3)
916 foo(hl, 2, 1)
917 }
918
919 func TestSelect2FromPairConstGroupedFloat(t *testing.T) {
920 x := archsimd.LoadFloat64x4Slice([]float64{0, 1, 10, 11})
921 y := archsimd.LoadFloat64x4Slice([]float64{2, 3, 12, 13})
922
923 ll := x.SelectFromPairGrouped(0, 1, y)
924 hh := x.SelectFromPairGrouped(3, 2, y)
925 lh := x.SelectFromPairGrouped(0, 3, y)
926 hl := x.SelectFromPairGrouped(2, 1, y)
927
928 r := make([]float64, 4, 4)
929
930 foo := func(v archsimd.Float64x4, a, b float64) {
931 v.StoreSlice(r)
932 checkSlices[float64](t, r, []float64{a, b, a + 10, b + 10})
933 }
934
935 foo(ll, 0, 1)
936 foo(hh, 3, 2)
937 foo(lh, 0, 3)
938 foo(hl, 2, 1)
939 }
940
941 func TestSelect2FromPairConstGroupedInt(t *testing.T) {
942 x := archsimd.LoadInt64x4Slice([]int64{0, 1, 10, 11})
943 y := archsimd.LoadInt64x4Slice([]int64{2, 3, 12, 13})
944
945 ll := x.SelectFromPairGrouped(0, 1, y)
946 hh := x.SelectFromPairGrouped(3, 2, y)
947 lh := x.SelectFromPairGrouped(0, 3, y)
948 hl := x.SelectFromPairGrouped(2, 1, y)
949
950 r := make([]int64, 4, 4)
951
952 foo := func(v archsimd.Int64x4, a, b int64) {
953 v.StoreSlice(r)
954 checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10})
955 }
956
957 foo(ll, 0, 1)
958 foo(hh, 3, 2)
959 foo(lh, 0, 3)
960 foo(hl, 2, 1)
961 }
962
963 func TestSelect2FromPairConstGroupedInt512(t *testing.T) {
964 if !archsimd.X86.AVX512() {
965 t.Skip("Test requires X86.AVX512, not available on this hardware")
966 return
967 }
968
969 x := archsimd.LoadInt64x8Slice([]int64{0, 1, 10, 11, 20, 21, 30, 31})
970 y := archsimd.LoadInt64x8Slice([]int64{2, 3, 12, 13, 22, 23, 32, 33})
971
972 ll := x.SelectFromPairGrouped(0, 1, y)
973 hh := x.SelectFromPairGrouped(3, 2, y)
974 lh := x.SelectFromPairGrouped(0, 3, y)
975 hl := x.SelectFromPairGrouped(2, 1, y)
976
977 r := make([]int64, 8, 8)
978
979 foo := func(v archsimd.Int64x8, a, b int64) {
980 v.StoreSlice(r)
981 checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10, a + 20, b + 20, a + 30, b + 30})
982 }
983
984 foo(ll, 0, 1)
985 foo(hh, 3, 2)
986 foo(lh, 0, 3)
987 foo(hl, 2, 1)
988 }
989
990 func TestString(t *testing.T) {
991 x := archsimd.LoadUint32x4Slice([]uint32{0, 1, 2, 3})
992 y := archsimd.LoadInt64x4Slice([]int64{-4, -5, -6, -7})
993 z := archsimd.LoadFloat32x4Slice([]float32{0.5, 1.5, -2.5, 3.5e9})
994 w := archsimd.LoadFloat64x4Slice([]float64{0.5, 1.5, -2.5, 3.5e9})
995
996 sx := "{0,1,2,3}"
997 sy := "{-4,-5,-6,-7}"
998 sz := "{0.5,1.5,-2.5,3.5e+09}"
999 sw := sz
1000
1001 if x.String() != sx {
1002 t.Errorf("x=%s wanted %s", x, sx)
1003 }
1004 if y.String() != sy {
1005 t.Errorf("y=%s wanted %s", y, sy)
1006 }
1007 if z.String() != sz {
1008 t.Errorf("z=%s wanted %s", z, sz)
1009 }
1010 if w.String() != sw {
1011 t.Errorf("w=%s wanted %s", w, sw)
1012 }
1013 t.Logf("w=%s", w)
1014 t.Logf("x=%s", x)
1015 t.Logf("y=%s", y)
1016 t.Logf("z=%s", z)
1017 }
1018
1019
1020 func a() []int32 {
1021 return make([]int32, 16, 16)
1022 }
1023
1024
1025
1026 func applyTo3(x, y, z archsimd.Int32x16, f func(x, y, z int32) int32) []int32 {
1027 ax, ay, az := a(), a(), a()
1028 x.StoreSlice(ax)
1029 y.StoreSlice(ay)
1030 z.StoreSlice(az)
1031
1032 r := a()
1033 for i := range r {
1034 r[i] = f(ax[i], ay[i], az[i])
1035 }
1036 return r
1037 }
1038
1039
1040
1041 func applyTo4(x, y, z, w archsimd.Int32x16, f func(x, y, z, w int32) int32) []int32 {
1042 ax, ay, az, aw := a(), a(), a(), a()
1043 x.StoreSlice(ax)
1044 y.StoreSlice(ay)
1045 z.StoreSlice(az)
1046 w.StoreSlice(aw)
1047
1048 r := make([]int32, len(ax), len(ax))
1049 for i := range r {
1050 r[i] = f(ax[i], ay[i], az[i], aw[i])
1051 }
1052 return r
1053 }
1054
1055 func TestSelectTernOptInt32x16(t *testing.T) {
1056 if !archsimd.X86.AVX512() {
1057 t.Skip("Test requires X86.AVX512, not available on this hardware")
1058 return
1059 }
1060 ax := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1}
1061 ay := []int32{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}
1062 az := []int32{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}
1063 aw := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1}
1064 am := []int32{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
1065
1066 x := archsimd.LoadInt32x16Slice(ax)
1067 y := archsimd.LoadInt32x16Slice(ay)
1068 z := archsimd.LoadInt32x16Slice(az)
1069 w := archsimd.LoadInt32x16Slice(aw)
1070 m := archsimd.LoadInt32x16Slice(am)
1071
1072 foo := func(v archsimd.Int32x16, s []int32) {
1073 r := make([]int32, 16, 16)
1074 v.StoreSlice(r)
1075 checkSlices[int32](t, r, s)
1076 }
1077
1078 t0 := w.Xor(y).Xor(z)
1079 ft0 := func(w, y, z int32) int32 {
1080 return w ^ y ^ z
1081 }
1082 foo(t0, applyTo3(w, y, z, ft0))
1083
1084 t1 := m.And(w.Xor(y).Xor(z.Not()))
1085 ft1 := func(m, w, y, z int32) int32 {
1086 return m & (w ^ y ^ ^z)
1087 }
1088 foo(t1, applyTo4(m, w, y, z, ft1))
1089
1090 t2 := x.Xor(y).Xor(z).And(x.Xor(y).Xor(z.Not()))
1091 ft2 := func(x, y, z int32) int32 {
1092 return (x ^ y ^ z) & (x ^ y ^ ^z)
1093 }
1094 foo(t2, applyTo3(x, y, z, ft2))
1095 }
1096
1097 func TestMaskedMerge(t *testing.T) {
1098 x := archsimd.LoadInt64x4Slice([]int64{1, 2, 3, 4})
1099 y := archsimd.LoadInt64x4Slice([]int64{5, 6, 1, 1})
1100 z := archsimd.LoadInt64x4Slice([]int64{-1, -2, -3, -4})
1101 res := make([]int64, 4)
1102 expected := []int64{6, 8, -3, -4}
1103 mask := x.Less(y)
1104 if archsimd.X86.AVX512() {
1105 x.Add(y).Merge(z, mask).StoreSlice(res)
1106 } else {
1107 x.Add(y).Merge(z, mask).StoreSlice(res)
1108 }
1109 for i := range 4 {
1110 if res[i] != expected[i] {
1111 t.Errorf("got %d wanted %d", res[i], expected[i])
1112 }
1113 }
1114 }
1115
1116 func TestDotProductQuadruple(t *testing.T) {
1117 if !archsimd.X86.AVXVNNI() {
1118 t.Skip("Test requires X86.AVXVNNI, not available on this hardware")
1119 return
1120 }
1121 xd := make([]int8, 16)
1122 yd := make([]uint8, 16)
1123 zd := make([]int32, 4)
1124 wanted1 := make([]int32, 4)
1125 wanted2 := make([]int32, 4)
1126 res1 := make([]int32, 4)
1127 res2 := make([]int32, 4)
1128 for i := range 4 {
1129 xd[i] = 5
1130 yd[i] = 6
1131 zd[i] = 3
1132 wanted1[i] = 30
1133 wanted2[i] = 30
1134 }
1135 x := archsimd.LoadInt8x16Slice(xd)
1136 y := archsimd.LoadUint8x16Slice(yd)
1137 z := archsimd.LoadInt32x4Slice(zd)
1138 x.DotProductQuadruple(y).StoreSlice(res1)
1139 x.DotProductQuadruple(y).Add(z).StoreSlice(res1)
1140 for i := range 4 {
1141 if res1[i] != wanted1[i] {
1142 t.Errorf("got %d wanted %d", res1[i], wanted1[i])
1143 }
1144 if res2[i] != wanted2[i] {
1145 t.Errorf("got %d wanted %d", res2[i], wanted2[i])
1146 }
1147 }
1148 }
1149
1150 func TestPermuteScalars(t *testing.T) {
1151 x := []int32{11, 12, 13, 14}
1152 want := []int32{12, 13, 14, 11}
1153 got := make([]int32, 4)
1154 archsimd.LoadInt32x4Slice(x).PermuteScalars(1, 2, 3, 0).StoreSlice(got)
1155 checkSlices(t, got, want)
1156 }
1157
1158 func TestPermuteScalarsGrouped(t *testing.T) {
1159 x := []int32{11, 12, 13, 14, 21, 22, 23, 24}
1160 want := []int32{12, 13, 14, 11, 22, 23, 24, 21}
1161 got := make([]int32, 8)
1162 archsimd.LoadInt32x8Slice(x).PermuteScalarsGrouped(1, 2, 3, 0).StoreSlice(got)
1163 checkSlices(t, got, want)
1164 }
1165
1166 func TestPermuteScalarsHi(t *testing.T) {
1167 x := []int16{-1, -2, -3, -4, 11, 12, 13, 14}
1168 want := []int16{-1, -2, -3, -4, 12, 13, 14, 11}
1169 got := make([]int16, len(x))
1170 archsimd.LoadInt16x8Slice(x).PermuteScalarsHi(1, 2, 3, 0).StoreSlice(got)
1171 checkSlices(t, got, want)
1172 }
1173
1174 func TestPermuteScalarsLo(t *testing.T) {
1175 x := []int16{11, 12, 13, 14, 4, 5, 6, 7}
1176 want := []int16{12, 13, 14, 11, 4, 5, 6, 7}
1177 got := make([]int16, len(x))
1178 archsimd.LoadInt16x8Slice(x).PermuteScalarsLo(1, 2, 3, 0).StoreSlice(got)
1179 checkSlices(t, got, want)
1180 }
1181
1182 func TestPermuteScalarsHiGrouped(t *testing.T) {
1183 x := []int16{-1, -2, -3, -4, 11, 12, 13, 14, -11, -12, -13, -14, 111, 112, 113, 114}
1184 want := []int16{-1, -2, -3, -4, 12, 13, 14, 11, -11, -12, -13, -14, 112, 113, 114, 111}
1185 got := make([]int16, len(x))
1186 archsimd.LoadInt16x16Slice(x).PermuteScalarsHiGrouped(1, 2, 3, 0).StoreSlice(got)
1187 checkSlices(t, got, want)
1188 }
1189
1190 func TestPermuteScalarsLoGrouped(t *testing.T) {
1191 x := []int16{11, 12, 13, 14, 4, 5, 6, 7, 111, 112, 113, 114, 14, 15, 16, 17}
1192 want := []int16{12, 13, 14, 11, 4, 5, 6, 7, 112, 113, 114, 111, 14, 15, 16, 17}
1193 got := make([]int16, len(x))
1194 archsimd.LoadInt16x16Slice(x).PermuteScalarsLoGrouped(1, 2, 3, 0).StoreSlice(got)
1195 checkSlices(t, got, want)
1196 }
1197
1198 func TestClMul(t *testing.T) {
1199 var x = archsimd.LoadUint64x2Slice([]uint64{1, 5})
1200 var y = archsimd.LoadUint64x2Slice([]uint64{3, 9})
1201
1202 foo := func(v archsimd.Uint64x2, s []uint64) {
1203 r := make([]uint64, 2, 2)
1204 v.StoreSlice(r)
1205 checkSlices[uint64](t, r, s)
1206 }
1207
1208 foo(x.CarrylessMultiply(0, 0, y), []uint64{3, 0})
1209 foo(x.CarrylessMultiply(0, 1, y), []uint64{9, 0})
1210 foo(x.CarrylessMultiply(1, 0, y), []uint64{15, 0})
1211 foo(x.CarrylessMultiply(1, 1, y), []uint64{45, 0})
1212 foo(y.CarrylessMultiply(0, 0, y), []uint64{5, 0})
1213
1214 }
1215
View as plain text