Text file
src/math/big/arith_s390x.s
1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
6
7 //go:build !math_big_pure_go
8
9 #include "textflag.h"
10
11 // func addVV(z, x, y []Word) (c Word)
12 TEXT ·addVV(SB), NOSPLIT, $0
13 MOVB ·hasVX(SB), R1
14 CMPBEQ R1, $0, novec
15 JMP ·addVVvec(SB)
16 novec:
17 MOVD $0, R0
18 MOVD z_len+8(FP), R1
19 MOVD x_base+24(FP), R2
20 MOVD y_base+48(FP), R3
21 MOVD z_base+0(FP), R4
22 // compute unrolled loop lengths
23 MOVD R1, R5
24 AND $3, R5
25 SRD $2, R1
26 ADDC R0, R1 // clear carry
27 loop1:
28 CMPBEQ R5, $0, loop1done
29 loop1cont:
30 // unroll 1X
31 MOVD 0(R2), R6
32 MOVD 0(R3), R7
33 ADDE R7, R6
34 MOVD R6, 0(R4)
35 LAY 8(R2), R2 // ADD $8, R2
36 LAY 8(R3), R3 // ADD $8, R3
37 LAY 8(R4), R4 // ADD $8, R4
38 LAY -1(R5), R5 // ADD $-1, R5
39 CMPBNE R5, $0, loop1cont
40 loop1done:
41 loop4:
42 CMPBEQ R1, $0, loop4done
43 loop4cont:
44 // unroll 4X in batches of 2
45 MOVD 0(R2), R5
46 MOVD 8(R2), R6
47 MOVD 0(R3), R7
48 MOVD 8(R3), R8
49 ADDE R7, R5
50 ADDE R8, R6
51 MOVD R5, 0(R4)
52 MOVD R6, 8(R4)
53 MOVD 16(R2), R5
54 MOVD 24(R2), R6
55 MOVD 16(R3), R7
56 MOVD 24(R3), R8
57 ADDE R7, R5
58 ADDE R8, R6
59 MOVD R5, 16(R4)
60 MOVD R6, 24(R4)
61 LAY 32(R2), R2 // ADD $32, R2
62 LAY 32(R3), R3 // ADD $32, R3
63 LAY 32(R4), R4 // ADD $32, R4
64 LAY -1(R1), R1 // ADD $-1, R1
65 CMPBNE R1, $0, loop4cont
66 loop4done:
67 ADDE R0, R0, R2 // save & convert add carry
68 MOVD R2, c+72(FP)
69 RET
70
71 // func subVV(z, x, y []Word) (c Word)
72 TEXT ·subVV(SB), NOSPLIT, $0
73 MOVB ·hasVX(SB), R1
74 CMPBEQ R1, $0, novec
75 JMP ·subVVvec(SB)
76 novec:
77 MOVD $0, R0
78 MOVD z_len+8(FP), R1
79 MOVD x_base+24(FP), R2
80 MOVD y_base+48(FP), R3
81 MOVD z_base+0(FP), R4
82 // compute unrolled loop lengths
83 MOVD R1, R5
84 AND $3, R5
85 SRD $2, R1
86 SUBC R0, R1 // clear carry
87 loop1:
88 CMPBEQ R5, $0, loop1done
89 loop1cont:
90 // unroll 1X
91 MOVD 0(R2), R6
92 MOVD 0(R3), R7
93 SUBE R7, R6
94 MOVD R6, 0(R4)
95 LAY 8(R2), R2 // ADD $8, R2
96 LAY 8(R3), R3 // ADD $8, R3
97 LAY 8(R4), R4 // ADD $8, R4
98 LAY -1(R5), R5 // ADD $-1, R5
99 CMPBNE R5, $0, loop1cont
100 loop1done:
101 loop4:
102 CMPBEQ R1, $0, loop4done
103 loop4cont:
104 // unroll 4X in batches of 2
105 MOVD 0(R2), R5
106 MOVD 8(R2), R6
107 MOVD 0(R3), R7
108 MOVD 8(R3), R8
109 SUBE R7, R5
110 SUBE R8, R6
111 MOVD R5, 0(R4)
112 MOVD R6, 8(R4)
113 MOVD 16(R2), R5
114 MOVD 24(R2), R6
115 MOVD 16(R3), R7
116 MOVD 24(R3), R8
117 SUBE R7, R5
118 SUBE R8, R6
119 MOVD R5, 16(R4)
120 MOVD R6, 24(R4)
121 LAY 32(R2), R2 // ADD $32, R2
122 LAY 32(R3), R3 // ADD $32, R3
123 LAY 32(R4), R4 // ADD $32, R4
124 LAY -1(R1), R1 // ADD $-1, R1
125 CMPBNE R1, $0, loop4cont
126 loop4done:
127 SUBE R2, R2 // save carry
128 NEG R2 // convert sub carry
129 MOVD R2, c+72(FP)
130 RET
131
132 // func lshVU(z, x []Word, s uint) (c Word)
133 TEXT ·lshVU(SB), NOSPLIT, $0
134 MOVD $0, R0
135 MOVD z_len+8(FP), R1
136 CMPBEQ R1, $0, ret0
137 MOVD s+48(FP), R2
138 MOVD x_base+24(FP), R3
139 MOVD z_base+0(FP), R4
140 // run loop backward
141 SLD $3, R1, R5
142 LAY (R5)(R3), R3 // ADD R5, R3
143 SLD $3, R1, R5
144 LAY (R5)(R4), R4 // ADD R5, R4
145 // shift first word into carry
146 MOVD -8(R3), R5
147 MOVD $64, R6
148 SUBC R2, R6
149 SRD R6, R5, R7
150 SLD R2, R5
151 MOVD R7, c+56(FP)
152 // shift remaining words
153 SUBC $1, R1
154 // compute unrolled loop lengths
155 MOVD R1, R7
156 AND $3, R7
157 SRD $2, R1
158 loop1:
159 CMPBEQ R7, $0, loop1done
160 loop1cont:
161 // unroll 1X
162 MOVD -16(R3), R8
163 SRD R6, R8, R9
164 OR R5, R9
165 SLD R2, R8, R5
166 MOVD R9, -8(R4)
167 LAY -8(R3), R3 // ADD $-8, R3
168 LAY -8(R4), R4 // ADD $-8, R4
169 LAY -1(R7), R7 // ADD $-1, R7
170 CMPBNE R7, $0, loop1cont
171 loop1done:
172 loop4:
173 CMPBEQ R1, $0, loop4done
174 loop4cont:
175 // unroll 4X in batches of 2
176 MOVD -16(R3), R7
177 MOVD -24(R3), R8
178 SRD R6, R7, R9
179 OR R5, R9
180 SLD R2, R7, R5
181 SRD R6, R8, R7
182 OR R5, R7
183 SLD R2, R8, R5
184 MOVD R9, -8(R4)
185 MOVD R7, -16(R4)
186 MOVD -32(R3), R7
187 MOVD -40(R3), R8
188 SRD R6, R7, R9
189 OR R5, R9
190 SLD R2, R7, R5
191 SRD R6, R8, R7
192 OR R5, R7
193 SLD R2, R8, R5
194 MOVD R9, -24(R4)
195 MOVD R7, -32(R4)
196 LAY -32(R3), R3 // ADD $-32, R3
197 LAY -32(R4), R4 // ADD $-32, R4
198 LAY -1(R1), R1 // ADD $-1, R1
199 CMPBNE R1, $0, loop4cont
200 loop4done:
201 // store final shifted bits
202 MOVD R5, -8(R4)
203 RET
204 ret0:
205 MOVD R0, c+56(FP)
206 RET
207
208 // func rshVU(z, x []Word, s uint) (c Word)
209 TEXT ·rshVU(SB), NOSPLIT, $0
210 MOVD $0, R0
211 MOVD z_len+8(FP), R1
212 CMPBEQ R1, $0, ret0
213 MOVD s+48(FP), R2
214 MOVD x_base+24(FP), R3
215 MOVD z_base+0(FP), R4
216 // shift first word into carry
217 MOVD 0(R3), R5
218 MOVD $64, R6
219 SUBC R2, R6
220 SLD R6, R5, R7
221 SRD R2, R5
222 MOVD R7, c+56(FP)
223 // shift remaining words
224 SUBC $1, R1
225 // compute unrolled loop lengths
226 MOVD R1, R7
227 AND $3, R7
228 SRD $2, R1
229 loop1:
230 CMPBEQ R7, $0, loop1done
231 loop1cont:
232 // unroll 1X
233 MOVD 8(R3), R8
234 SLD R6, R8, R9
235 OR R5, R9
236 SRD R2, R8, R5
237 MOVD R9, 0(R4)
238 LAY 8(R3), R3 // ADD $8, R3
239 LAY 8(R4), R4 // ADD $8, R4
240 LAY -1(R7), R7 // ADD $-1, R7
241 CMPBNE R7, $0, loop1cont
242 loop1done:
243 loop4:
244 CMPBEQ R1, $0, loop4done
245 loop4cont:
246 // unroll 4X in batches of 2
247 MOVD 8(R3), R7
248 MOVD 16(R3), R8
249 SLD R6, R7, R9
250 OR R5, R9
251 SRD R2, R7, R5
252 SLD R6, R8, R7
253 OR R5, R7
254 SRD R2, R8, R5
255 MOVD R9, 0(R4)
256 MOVD R7, 8(R4)
257 MOVD 24(R3), R7
258 MOVD 32(R3), R8
259 SLD R6, R7, R9
260 OR R5, R9
261 SRD R2, R7, R5
262 SLD R6, R8, R7
263 OR R5, R7
264 SRD R2, R8, R5
265 MOVD R9, 16(R4)
266 MOVD R7, 24(R4)
267 LAY 32(R3), R3 // ADD $32, R3
268 LAY 32(R4), R4 // ADD $32, R4
269 LAY -1(R1), R1 // ADD $-1, R1
270 CMPBNE R1, $0, loop4cont
271 loop4done:
272 // store final shifted bits
273 MOVD R5, 0(R4)
274 RET
275 ret0:
276 MOVD R0, c+56(FP)
277 RET
278
279 // func mulAddVWW(z, x []Word, m, a Word) (c Word)
280 TEXT ·mulAddVWW(SB), NOSPLIT, $0
281 MOVD $0, R0
282 MOVD m+48(FP), R1
283 MOVD a+56(FP), R2
284 MOVD z_len+8(FP), R3
285 MOVD x_base+24(FP), R4
286 MOVD z_base+0(FP), R5
287 // compute unrolled loop lengths
288 MOVD R3, R6
289 AND $3, R6
290 SRD $2, R3
291 loop1:
292 CMPBEQ R6, $0, loop1done
293 loop1cont:
294 // unroll 1X in batches of 1
295 MOVD 0(R4), R11
296 // multiply
297 MLGR R1, R10
298 ADDC R2, R11
299 ADDE R0, R10, R2
300 MOVD R11, 0(R5)
301 LAY 8(R4), R4 // ADD $8, R4
302 LAY 8(R5), R5 // ADD $8, R5
303 LAY -1(R6), R6 // ADD $-1, R6
304 CMPBNE R6, $0, loop1cont
305 loop1done:
306 loop4:
307 CMPBEQ R3, $0, loop4done
308 loop4cont:
309 // unroll 4X in batches of 1
310 MOVD 0(R4), R11
311 // multiply
312 MLGR R1, R10
313 ADDC R2, R11
314 ADDE R0, R10, R2
315 MOVD R11, 0(R5)
316 MOVD 8(R4), R11
317 // multiply
318 MLGR R1, R10
319 ADDC R2, R11
320 ADDE R0, R10, R2
321 MOVD R11, 8(R5)
322 MOVD 16(R4), R11
323 // multiply
324 MLGR R1, R10
325 ADDC R2, R11
326 ADDE R0, R10, R2
327 MOVD R11, 16(R5)
328 MOVD 24(R4), R11
329 // multiply
330 MLGR R1, R10
331 ADDC R2, R11
332 ADDE R0, R10, R2
333 MOVD R11, 24(R5)
334 LAY 32(R4), R4 // ADD $32, R4
335 LAY 32(R5), R5 // ADD $32, R5
336 LAY -1(R3), R3 // ADD $-1, R3
337 CMPBNE R3, $0, loop4cont
338 loop4done:
339 MOVD R2, c+64(FP)
340 RET
341
342 // func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
343 TEXT ·addMulVVWW(SB), NOSPLIT, $0
344 MOVD $0, R0
345 MOVD m+72(FP), R1
346 MOVD a+80(FP), R2
347 MOVD z_len+8(FP), R3
348 MOVD x_base+24(FP), R4
349 MOVD y_base+48(FP), R5
350 MOVD z_base+0(FP), R6
351 // compute unrolled loop lengths
352 MOVD R3, R7
353 AND $3, R7
354 SRD $2, R3
355 loop1:
356 CMPBEQ R7, $0, loop1done
357 loop1cont:
358 // unroll 1X in batches of 1
359 MOVD 0(R4), R8
360 MOVD 0(R5), R11
361 // multiply
362 MLGR R1, R10
363 ADDC R2, R11
364 ADDE R0, R10, R2
365 // add
366 ADDC R8, R11
367 ADDE R0, R2
368 MOVD R11, 0(R6)
369 LAY 8(R4), R4 // ADD $8, R4
370 LAY 8(R5), R5 // ADD $8, R5
371 LAY 8(R6), R6 // ADD $8, R6
372 LAY -1(R7), R7 // ADD $-1, R7
373 CMPBNE R7, $0, loop1cont
374 loop1done:
375 loop4:
376 CMPBEQ R3, $0, loop4done
377 loop4cont:
378 // unroll 4X in batches of 1
379 MOVD 0(R4), R7
380 MOVD 0(R5), R11
381 // multiply
382 MLGR R1, R10
383 ADDC R2, R11
384 ADDE R0, R10, R2
385 // add
386 ADDC R7, R11
387 ADDE R0, R2
388 MOVD R11, 0(R6)
389 MOVD 8(R4), R7
390 MOVD 8(R5), R11
391 // multiply
392 MLGR R1, R10
393 ADDC R2, R11
394 ADDE R0, R10, R2
395 // add
396 ADDC R7, R11
397 ADDE R0, R2
398 MOVD R11, 8(R6)
399 MOVD 16(R4), R7
400 MOVD 16(R5), R11
401 // multiply
402 MLGR R1, R10
403 ADDC R2, R11
404 ADDE R0, R10, R2
405 // add
406 ADDC R7, R11
407 ADDE R0, R2
408 MOVD R11, 16(R6)
409 MOVD 24(R4), R7
410 MOVD 24(R5), R11
411 // multiply
412 MLGR R1, R10
413 ADDC R2, R11
414 ADDE R0, R10, R2
415 // add
416 ADDC R7, R11
417 ADDE R0, R2
418 MOVD R11, 24(R6)
419 LAY 32(R4), R4 // ADD $32, R4
420 LAY 32(R5), R5 // ADD $32, R5
421 LAY 32(R6), R6 // ADD $32, R6
422 LAY -1(R3), R3 // ADD $-1, R3
423 CMPBNE R3, $0, loop4cont
424 loop4done:
425 MOVD R2, c+88(FP)
426 RET
427
View as plain text