Text file
src/math/big/arith_ppc64x.s
1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
6
7 //go:build !math_big_pure_go && (ppc64 || ppc64le)
8
9 #include "textflag.h"
10
11 // func addVV(z, x, y []Word) (c Word)
12 TEXT ·addVV(SB), NOSPLIT, $0
13 MOVD z_len+8(FP), R3
14 MOVD x_base+24(FP), R4
15 MOVD y_base+48(FP), R5
16 MOVD z_base+0(FP), R6
17 // compute unrolled loop lengths
18 ANDCC $3, R3, R7
19 SRD $2, R3
20 ADDC R0, R3 // clear carry
21 loop1:
22 CMP R7, $0; BEQ loop1done; MOVD R7, CTR
23 loop1cont:
24 // unroll 1X
25 MOVD 0(R4), R8
26 MOVD 0(R5), R9
27 ADDE R9, R8
28 MOVD R8, 0(R6)
29 ADD $8, R4
30 ADD $8, R5
31 ADD $8, R6
32 BDNZ loop1cont
33 loop1done:
34 loop4:
35 CMP R3, $0; BEQ loop4done; MOVD R3, CTR
36 loop4cont:
37 // unroll 4X
38 MOVD 0(R4), R7
39 MOVD 8(R4), R8
40 MOVD 16(R4), R9
41 MOVD 24(R4), R10
42 MOVD 0(R5), R11
43 MOVD 8(R5), R12
44 MOVD 16(R5), R14
45 MOVD 24(R5), R15
46 ADDE R11, R7
47 ADDE R12, R8
48 ADDE R14, R9
49 ADDE R15, R10
50 MOVD R7, 0(R6)
51 MOVD R8, 8(R6)
52 MOVD R9, 16(R6)
53 MOVD R10, 24(R6)
54 ADD $32, R4
55 ADD $32, R5
56 ADD $32, R6
57 BDNZ loop4cont
58 loop4done:
59 ADDE R0, R0, R4 // save & convert add carry
60 MOVD R4, c+72(FP)
61 RET
62
63 // func subVV(z, x, y []Word) (c Word)
64 TEXT ·subVV(SB), NOSPLIT, $0
65 MOVD z_len+8(FP), R3
66 MOVD x_base+24(FP), R4
67 MOVD y_base+48(FP), R5
68 MOVD z_base+0(FP), R6
69 // compute unrolled loop lengths
70 ANDCC $3, R3, R7
71 SRD $2, R3
72 SUBC R0, R3 // clear carry
73 loop1:
74 CMP R7, $0; BEQ loop1done; MOVD R7, CTR
75 loop1cont:
76 // unroll 1X
77 MOVD 0(R4), R8
78 MOVD 0(R5), R9
79 SUBE R9, R8
80 MOVD R8, 0(R6)
81 ADD $8, R4
82 ADD $8, R5
83 ADD $8, R6
84 BDNZ loop1cont
85 loop1done:
86 loop4:
87 CMP R3, $0; BEQ loop4done; MOVD R3, CTR
88 loop4cont:
89 // unroll 4X
90 MOVD 0(R4), R7
91 MOVD 8(R4), R8
92 MOVD 16(R4), R9
93 MOVD 24(R4), R10
94 MOVD 0(R5), R11
95 MOVD 8(R5), R12
96 MOVD 16(R5), R14
97 MOVD 24(R5), R15
98 SUBE R11, R7
99 SUBE R12, R8
100 SUBE R14, R9
101 SUBE R15, R10
102 MOVD R7, 0(R6)
103 MOVD R8, 8(R6)
104 MOVD R9, 16(R6)
105 MOVD R10, 24(R6)
106 ADD $32, R4
107 ADD $32, R5
108 ADD $32, R6
109 BDNZ loop4cont
110 loop4done:
111 SUBE R4, R4 // save carry
112 SUB R4, R0, R4 // convert sub carry
113 MOVD R4, c+72(FP)
114 RET
115
116 // func lshVU(z, x []Word, s uint) (c Word)
117 TEXT ·lshVU(SB), NOSPLIT, $0
118 MOVD z_len+8(FP), R3
119 CMP R3, $0; BEQ ret0
120 MOVD s+48(FP), R4
121 MOVD x_base+24(FP), R5
122 MOVD z_base+0(FP), R6
123 // run loop backward
124 SLD $3, R3, R7
125 ADD R7, R5
126 SLD $3, R3, R7
127 ADD R7, R6
128 // shift first word into carry
129 MOVD -8(R5), R7
130 MOVD $64, R8
131 SUB R4, R8
132 SRD R8, R7, R9
133 SLD R4, R7
134 MOVD R9, c+56(FP)
135 // shift remaining words
136 SUB $1, R3
137 // compute unrolled loop lengths
138 ANDCC $3, R3, R9
139 SRD $2, R3
140 loop1:
141 CMP R9, $0; BEQ loop1done; MOVD R9, CTR
142 loop1cont:
143 // unroll 1X
144 MOVD -16(R5), R10
145 SRD R8, R10, R11
146 OR R7, R11
147 SLD R4, R10, R7
148 MOVD R11, -8(R6)
149 ADD $-8, R5
150 ADD $-8, R6
151 BDNZ loop1cont
152 loop1done:
153 loop4:
154 CMP R3, $0; BEQ loop4done; MOVD R3, CTR
155 loop4cont:
156 // unroll 4X
157 MOVD -16(R5), R9
158 MOVD -24(R5), R10
159 MOVD -32(R5), R11
160 MOVD -40(R5), R12
161 SRD R8, R9, R14
162 OR R7, R14
163 SLD R4, R9, R7
164 SRD R8, R10, R9
165 OR R7, R9
166 SLD R4, R10, R7
167 SRD R8, R11, R10
168 OR R7, R10
169 SLD R4, R11, R7
170 SRD R8, R12, R11
171 OR R7, R11
172 SLD R4, R12, R7
173 MOVD R14, -8(R6)
174 MOVD R9, -16(R6)
175 MOVD R10, -24(R6)
176 MOVD R11, -32(R6)
177 ADD $-32, R5
178 ADD $-32, R6
179 BDNZ loop4cont
180 loop4done:
181 // store final shifted bits
182 MOVD R7, -8(R6)
183 RET
184 ret0:
185 MOVD R0, c+56(FP)
186 RET
187
188 // func rshVU(z, x []Word, s uint) (c Word)
189 TEXT ·rshVU(SB), NOSPLIT, $0
190 MOVD z_len+8(FP), R3
191 CMP R3, $0; BEQ ret0
192 MOVD s+48(FP), R4
193 MOVD x_base+24(FP), R5
194 MOVD z_base+0(FP), R6
195 // shift first word into carry
196 MOVD 0(R5), R7
197 MOVD $64, R8
198 SUB R4, R8
199 SLD R8, R7, R9
200 SRD R4, R7
201 MOVD R9, c+56(FP)
202 // shift remaining words
203 SUB $1, R3
204 // compute unrolled loop lengths
205 ANDCC $3, R3, R9
206 SRD $2, R3
207 loop1:
208 CMP R9, $0; BEQ loop1done; MOVD R9, CTR
209 loop1cont:
210 // unroll 1X
211 MOVD 8(R5), R10
212 SLD R8, R10, R11
213 OR R7, R11
214 SRD R4, R10, R7
215 MOVD R11, 0(R6)
216 ADD $8, R5
217 ADD $8, R6
218 BDNZ loop1cont
219 loop1done:
220 loop4:
221 CMP R3, $0; BEQ loop4done; MOVD R3, CTR
222 loop4cont:
223 // unroll 4X
224 MOVD 8(R5), R9
225 MOVD 16(R5), R10
226 MOVD 24(R5), R11
227 MOVD 32(R5), R12
228 SLD R8, R9, R14
229 OR R7, R14
230 SRD R4, R9, R7
231 SLD R8, R10, R9
232 OR R7, R9
233 SRD R4, R10, R7
234 SLD R8, R11, R10
235 OR R7, R10
236 SRD R4, R11, R7
237 SLD R8, R12, R11
238 OR R7, R11
239 SRD R4, R12, R7
240 MOVD R14, 0(R6)
241 MOVD R9, 8(R6)
242 MOVD R10, 16(R6)
243 MOVD R11, 24(R6)
244 ADD $32, R5
245 ADD $32, R6
246 BDNZ loop4cont
247 loop4done:
248 // store final shifted bits
249 MOVD R7, 0(R6)
250 RET
251 ret0:
252 MOVD R0, c+56(FP)
253 RET
254
255 // func mulAddVWW(z, x []Word, m, a Word) (c Word)
256 TEXT ·mulAddVWW(SB), NOSPLIT, $0
257 MOVD m+48(FP), R3
258 MOVD a+56(FP), R4
259 MOVD z_len+8(FP), R5
260 MOVD x_base+24(FP), R6
261 MOVD z_base+0(FP), R7
262 // compute unrolled loop lengths
263 ANDCC $3, R5, R8
264 SRD $2, R5
265 loop1:
266 CMP R8, $0; BEQ loop1done; MOVD R8, CTR
267 loop1cont:
268 // unroll 1X
269 MOVD 0(R6), R9
270 // multiply
271 MULHDU R3, R9, R10
272 MULLD R3, R9
273 ADDC R4, R9
274 ADDE R0, R10, R4
275 MOVD R9, 0(R7)
276 ADD $8, R6
277 ADD $8, R7
278 BDNZ loop1cont
279 loop1done:
280 loop4:
281 CMP R5, $0; BEQ loop4done; MOVD R5, CTR
282 loop4cont:
283 // unroll 4X
284 MOVD 0(R6), R8
285 MOVD 8(R6), R9
286 MOVD 16(R6), R10
287 MOVD 24(R6), R11
288 // multiply
289 MULHDU R3, R8, R12
290 MULLD R3, R8
291 ADDC R4, R8
292 MULHDU R3, R9, R14
293 MULLD R3, R9
294 ADDE R12, R9
295 MULHDU R3, R10, R12
296 MULLD R3, R10
297 ADDE R14, R10
298 MULHDU R3, R11, R14
299 MULLD R3, R11
300 ADDE R12, R11
301 ADDE R0, R14, R4
302 MOVD R8, 0(R7)
303 MOVD R9, 8(R7)
304 MOVD R10, 16(R7)
305 MOVD R11, 24(R7)
306 ADD $32, R6
307 ADD $32, R7
308 BDNZ loop4cont
309 loop4done:
310 MOVD R4, c+64(FP)
311 RET
312
313 // func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
314 TEXT ·addMulVVWW(SB), NOSPLIT, $0
315 MOVD m+72(FP), R3
316 MOVD a+80(FP), R4
317 MOVD z_len+8(FP), R5
318 MOVD x_base+24(FP), R6
319 MOVD y_base+48(FP), R7
320 MOVD z_base+0(FP), R8
321 // compute unrolled loop lengths
322 ANDCC $3, R5, R9
323 SRD $2, R5
324 loop1:
325 CMP R9, $0; BEQ loop1done; MOVD R9, CTR
326 loop1cont:
327 // unroll 1X
328 MOVD 0(R6), R10
329 MOVD 0(R7), R11
330 // multiply
331 MULHDU R3, R11, R12
332 MULLD R3, R11
333 ADDC R4, R11
334 ADDE R0, R12, R4
335 // add
336 ADDC R10, R11
337 ADDE R0, R4
338 MOVD R11, 0(R8)
339 ADD $8, R6
340 ADD $8, R7
341 ADD $8, R8
342 BDNZ loop1cont
343 loop1done:
344 loop4:
345 CMP R5, $0; BEQ loop4done; MOVD R5, CTR
346 loop4cont:
347 // unroll 4X
348 MOVD 0(R6), R9
349 MOVD 8(R6), R10
350 MOVD 16(R6), R11
351 MOVD 24(R6), R12
352 MOVD 0(R7), R14
353 MOVD 8(R7), R15
354 MOVD 16(R7), R16
355 MOVD 24(R7), R17
356 // multiply
357 MULHDU R3, R14, R18
358 MULLD R3, R14
359 ADDC R4, R14
360 MULHDU R3, R15, R19
361 MULLD R3, R15
362 ADDE R18, R15
363 MULHDU R3, R16, R18
364 MULLD R3, R16
365 ADDE R19, R16
366 MULHDU R3, R17, R19
367 MULLD R3, R17
368 ADDE R18, R17
369 ADDE R0, R19, R4
370 // add
371 ADDC R9, R14
372 ADDE R10, R15
373 ADDE R11, R16
374 ADDE R12, R17
375 ADDE R0, R4
376 MOVD R14, 0(R8)
377 MOVD R15, 8(R8)
378 MOVD R16, 16(R8)
379 MOVD R17, 24(R8)
380 ADD $32, R6
381 ADD $32, R7
382 ADD $32, R8
383 BDNZ loop4cont
384 loop4done:
385 MOVD R4, c+88(FP)
386 RET
387
View as plain text