Text file
src/math/big/arith_mipsx.s
1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
6
7 //go:build !math_big_pure_go && (mips || mipsle)
8
9 #include "textflag.h"
10
11 // func addVV(z, x, y []Word) (c Word)
12 TEXT ·addVV(SB), NOSPLIT, $0
13 MOVW z_len+4(FP), R1
14 MOVW x_base+12(FP), R2
15 MOVW y_base+24(FP), R3
16 MOVW z_base+0(FP), R4
17 // compute unrolled loop lengths
18 AND $3, R1, R5
19 SRL $2, R1
20 XOR R26, R26 // clear carry
21 loop1:
22 BEQ R5, loop1done
23 loop1cont:
24 // unroll 1X
25 MOVW 0(R2), R6
26 MOVW 0(R3), R7
27 ADDU R7, R6 // ADCS R7, R6, R6 (cr=R26)
28 SGTU R7, R6, R23 // ...
29 ADDU R26, R6 // ...
30 SGTU R26, R6, R26 // ...
31 ADDU R23, R26 // ...
32 MOVW R6, 0(R4)
33 ADDU $4, R2
34 ADDU $4, R3
35 ADDU $4, R4
36 SUBU $1, R5
37 BNE R5, loop1cont
38 loop1done:
39 loop4:
40 BEQ R1, loop4done
41 loop4cont:
42 // unroll 4X
43 MOVW 0(R2), R5
44 MOVW 4(R2), R6
45 MOVW 8(R2), R7
46 MOVW 12(R2), R8
47 MOVW 0(R3), R9
48 MOVW 4(R3), R10
49 MOVW 8(R3), R11
50 MOVW 12(R3), R12
51 ADDU R9, R5 // ADCS R9, R5, R5 (cr=R26)
52 SGTU R9, R5, R23 // ...
53 ADDU R26, R5 // ...
54 SGTU R26, R5, R26 // ...
55 ADDU R23, R26 // ...
56 ADDU R10, R6 // ADCS R10, R6, R6 (cr=R26)
57 SGTU R10, R6, R23 // ...
58 ADDU R26, R6 // ...
59 SGTU R26, R6, R26 // ...
60 ADDU R23, R26 // ...
61 ADDU R11, R7 // ADCS R11, R7, R7 (cr=R26)
62 SGTU R11, R7, R23 // ...
63 ADDU R26, R7 // ...
64 SGTU R26, R7, R26 // ...
65 ADDU R23, R26 // ...
66 ADDU R12, R8 // ADCS R12, R8, R8 (cr=R26)
67 SGTU R12, R8, R23 // ...
68 ADDU R26, R8 // ...
69 SGTU R26, R8, R26 // ...
70 ADDU R23, R26 // ...
71 MOVW R5, 0(R4)
72 MOVW R6, 4(R4)
73 MOVW R7, 8(R4)
74 MOVW R8, 12(R4)
75 ADDU $16, R2
76 ADDU $16, R3
77 ADDU $16, R4
78 SUBU $1, R1
79 BNE R1, loop4cont
80 loop4done:
81 MOVW R26, c+36(FP)
82 RET
83
84 // func subVV(z, x, y []Word) (c Word)
85 TEXT ·subVV(SB), NOSPLIT, $0
86 MOVW z_len+4(FP), R1
87 MOVW x_base+12(FP), R2
88 MOVW y_base+24(FP), R3
89 MOVW z_base+0(FP), R4
90 // compute unrolled loop lengths
91 AND $3, R1, R5
92 SRL $2, R1
93 XOR R26, R26 // clear carry
94 loop1:
95 BEQ R5, loop1done
96 loop1cont:
97 // unroll 1X
98 MOVW 0(R2), R6
99 MOVW 0(R3), R7
100 SGTU R26, R6, R23 // SBCS R7, R6, R6
101 SUBU R26, R6 // ...
102 SGTU R7, R6, R26 // ...
103 SUBU R7, R6 // ...
104 ADDU R23, R26 // ...
105 MOVW R6, 0(R4)
106 ADDU $4, R2
107 ADDU $4, R3
108 ADDU $4, R4
109 SUBU $1, R5
110 BNE R5, loop1cont
111 loop1done:
112 loop4:
113 BEQ R1, loop4done
114 loop4cont:
115 // unroll 4X
116 MOVW 0(R2), R5
117 MOVW 4(R2), R6
118 MOVW 8(R2), R7
119 MOVW 12(R2), R8
120 MOVW 0(R3), R9
121 MOVW 4(R3), R10
122 MOVW 8(R3), R11
123 MOVW 12(R3), R12
124 SGTU R26, R5, R23 // SBCS R9, R5, R5
125 SUBU R26, R5 // ...
126 SGTU R9, R5, R26 // ...
127 SUBU R9, R5 // ...
128 ADDU R23, R26 // ...
129 SGTU R26, R6, R23 // SBCS R10, R6, R6
130 SUBU R26, R6 // ...
131 SGTU R10, R6, R26 // ...
132 SUBU R10, R6 // ...
133 ADDU R23, R26 // ...
134 SGTU R26, R7, R23 // SBCS R11, R7, R7
135 SUBU R26, R7 // ...
136 SGTU R11, R7, R26 // ...
137 SUBU R11, R7 // ...
138 ADDU R23, R26 // ...
139 SGTU R26, R8, R23 // SBCS R12, R8, R8
140 SUBU R26, R8 // ...
141 SGTU R12, R8, R26 // ...
142 SUBU R12, R8 // ...
143 ADDU R23, R26 // ...
144 MOVW R5, 0(R4)
145 MOVW R6, 4(R4)
146 MOVW R7, 8(R4)
147 MOVW R8, 12(R4)
148 ADDU $16, R2
149 ADDU $16, R3
150 ADDU $16, R4
151 SUBU $1, R1
152 BNE R1, loop4cont
153 loop4done:
154 MOVW R26, c+36(FP)
155 RET
156
157 // func lshVU(z, x []Word, s uint) (c Word)
158 TEXT ·lshVU(SB), NOSPLIT, $0
159 MOVW z_len+4(FP), R1
160 BEQ R1, ret0
161 MOVW s+24(FP), R2
162 MOVW x_base+12(FP), R3
163 MOVW z_base+0(FP), R4
164 // run loop backward
165 SLL $2, R1, R5
166 ADDU R5, R3
167 SLL $2, R1, R5
168 ADDU R5, R4
169 // shift first word into carry
170 MOVW -4(R3), R5
171 MOVW $32, R6
172 SUBU R2, R6
173 SRL R6, R5, R7
174 SLL R2, R5
175 MOVW R7, c+28(FP)
176 // shift remaining words
177 SUBU $1, R1
178 // compute unrolled loop lengths
179 AND $3, R1, R7
180 SRL $2, R1
181 loop1:
182 BEQ R7, loop1done
183 loop1cont:
184 // unroll 1X
185 MOVW -8(R3), R8
186 SRL R6, R8, R9
187 OR R5, R9
188 SLL R2, R8, R5
189 MOVW R9, -4(R4)
190 ADDU $-4, R3
191 ADDU $-4, R4
192 SUBU $1, R7
193 BNE R7, loop1cont
194 loop1done:
195 loop4:
196 BEQ R1, loop4done
197 loop4cont:
198 // unroll 4X
199 MOVW -8(R3), R7
200 MOVW -12(R3), R8
201 MOVW -16(R3), R9
202 MOVW -20(R3), R10
203 SRL R6, R7, R11
204 OR R5, R11
205 SLL R2, R7, R5
206 SRL R6, R8, R7
207 OR R5, R7
208 SLL R2, R8, R5
209 SRL R6, R9, R8
210 OR R5, R8
211 SLL R2, R9, R5
212 SRL R6, R10, R9
213 OR R5, R9
214 SLL R2, R10, R5
215 MOVW R11, -4(R4)
216 MOVW R7, -8(R4)
217 MOVW R8, -12(R4)
218 MOVW R9, -16(R4)
219 ADDU $-16, R3
220 ADDU $-16, R4
221 SUBU $1, R1
222 BNE R1, loop4cont
223 loop4done:
224 // store final shifted bits
225 MOVW R5, -4(R4)
226 RET
227 ret0:
228 MOVW R0, c+28(FP)
229 RET
230
231 // func rshVU(z, x []Word, s uint) (c Word)
232 TEXT ·rshVU(SB), NOSPLIT, $0
233 MOVW z_len+4(FP), R1
234 BEQ R1, ret0
235 MOVW s+24(FP), R2
236 MOVW x_base+12(FP), R3
237 MOVW z_base+0(FP), R4
238 // shift first word into carry
239 MOVW 0(R3), R5
240 MOVW $32, R6
241 SUBU R2, R6
242 SLL R6, R5, R7
243 SRL R2, R5
244 MOVW R7, c+28(FP)
245 // shift remaining words
246 SUBU $1, R1
247 // compute unrolled loop lengths
248 AND $3, R1, R7
249 SRL $2, R1
250 loop1:
251 BEQ R7, loop1done
252 loop1cont:
253 // unroll 1X
254 MOVW 4(R3), R8
255 SLL R6, R8, R9
256 OR R5, R9
257 SRL R2, R8, R5
258 MOVW R9, 0(R4)
259 ADDU $4, R3
260 ADDU $4, R4
261 SUBU $1, R7
262 BNE R7, loop1cont
263 loop1done:
264 loop4:
265 BEQ R1, loop4done
266 loop4cont:
267 // unroll 4X
268 MOVW 4(R3), R7
269 MOVW 8(R3), R8
270 MOVW 12(R3), R9
271 MOVW 16(R3), R10
272 SLL R6, R7, R11
273 OR R5, R11
274 SRL R2, R7, R5
275 SLL R6, R8, R7
276 OR R5, R7
277 SRL R2, R8, R5
278 SLL R6, R9, R8
279 OR R5, R8
280 SRL R2, R9, R5
281 SLL R6, R10, R9
282 OR R5, R9
283 SRL R2, R10, R5
284 MOVW R11, 0(R4)
285 MOVW R7, 4(R4)
286 MOVW R8, 8(R4)
287 MOVW R9, 12(R4)
288 ADDU $16, R3
289 ADDU $16, R4
290 SUBU $1, R1
291 BNE R1, loop4cont
292 loop4done:
293 // store final shifted bits
294 MOVW R5, 0(R4)
295 RET
296 ret0:
297 MOVW R0, c+28(FP)
298 RET
299
300 // func mulAddVWW(z, x []Word, m, a Word) (c Word)
301 TEXT ·mulAddVWW(SB), NOSPLIT, $0
302 MOVW m+24(FP), R1
303 MOVW a+28(FP), R2
304 MOVW z_len+4(FP), R3
305 MOVW x_base+12(FP), R4
306 MOVW z_base+0(FP), R5
307 // compute unrolled loop lengths
308 AND $3, R3, R6
309 SRL $2, R3
310 loop1:
311 BEQ R6, loop1done
312 loop1cont:
313 // unroll 1X
314 MOVW 0(R4), R7
315 // synthetic carry, one column at a time
316 MULU R1, R7
317 MOVW LO, R8
318 MOVW HI, R9
319 ADDU R2, R8, R7 // ADDS R2, R8, R7 (cr=R26)
320 SGTU R2, R7, R26 // ...
321 ADDU R26, R9, R2 // ADC $0, R9, R2
322 MOVW R7, 0(R5)
323 ADDU $4, R4
324 ADDU $4, R5
325 SUBU $1, R6
326 BNE R6, loop1cont
327 loop1done:
328 loop4:
329 BEQ R3, loop4done
330 loop4cont:
331 // unroll 4X
332 MOVW 0(R4), R6
333 MOVW 4(R4), R7
334 MOVW 8(R4), R8
335 MOVW 12(R4), R9
336 // synthetic carry, one column at a time
337 MULU R1, R6
338 MOVW LO, R10
339 MOVW HI, R11
340 ADDU R2, R10, R6 // ADDS R2, R10, R6 (cr=R26)
341 SGTU R2, R6, R26 // ...
342 ADDU R26, R11, R2 // ADC $0, R11, R2
343 MULU R1, R7
344 MOVW LO, R10
345 MOVW HI, R11
346 ADDU R2, R10, R7 // ADDS R2, R10, R7 (cr=R26)
347 SGTU R2, R7, R26 // ...
348 ADDU R26, R11, R2 // ADC $0, R11, R2
349 MULU R1, R8
350 MOVW LO, R10
351 MOVW HI, R11
352 ADDU R2, R10, R8 // ADDS R2, R10, R8 (cr=R26)
353 SGTU R2, R8, R26 // ...
354 ADDU R26, R11, R2 // ADC $0, R11, R2
355 MULU R1, R9
356 MOVW LO, R10
357 MOVW HI, R11
358 ADDU R2, R10, R9 // ADDS R2, R10, R9 (cr=R26)
359 SGTU R2, R9, R26 // ...
360 ADDU R26, R11, R2 // ADC $0, R11, R2
361 MOVW R6, 0(R5)
362 MOVW R7, 4(R5)
363 MOVW R8, 8(R5)
364 MOVW R9, 12(R5)
365 ADDU $16, R4
366 ADDU $16, R5
367 SUBU $1, R3
368 BNE R3, loop4cont
369 loop4done:
370 MOVW R2, c+32(FP)
371 RET
372
373 // func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
374 TEXT ·addMulVVWW(SB), NOSPLIT, $0
375 MOVW m+36(FP), R1
376 MOVW a+40(FP), R2
377 MOVW z_len+4(FP), R3
378 MOVW x_base+12(FP), R4
379 MOVW y_base+24(FP), R5
380 MOVW z_base+0(FP), R6
381 // compute unrolled loop lengths
382 AND $3, R3, R7
383 SRL $2, R3
384 loop1:
385 BEQ R7, loop1done
386 loop1cont:
387 // unroll 1X
388 MOVW 0(R4), R8
389 MOVW 0(R5), R9
390 // synthetic carry, one column at a time
391 MULU R1, R9
392 MOVW LO, R10
393 MOVW HI, R11
394 ADDU R8, R10 // ADDS R8, R10, R10 (cr=R26)
395 SGTU R8, R10, R26 // ...
396 ADDU R26, R11 // ADC $0, R11, R11
397 ADDU R2, R10, R9 // ADDS R2, R10, R9 (cr=R26)
398 SGTU R2, R9, R26 // ...
399 ADDU R26, R11, R2 // ADC $0, R11, R2
400 MOVW R9, 0(R6)
401 ADDU $4, R4
402 ADDU $4, R5
403 ADDU $4, R6
404 SUBU $1, R7
405 BNE R7, loop1cont
406 loop1done:
407 loop4:
408 BEQ R3, loop4done
409 loop4cont:
410 // unroll 4X
411 MOVW 0(R4), R7
412 MOVW 4(R4), R8
413 MOVW 8(R4), R9
414 MOVW 12(R4), R10
415 MOVW 0(R5), R11
416 MOVW 4(R5), R12
417 MOVW 8(R5), R13
418 MOVW 12(R5), R14
419 // synthetic carry, one column at a time
420 MULU R1, R11
421 MOVW LO, R15
422 MOVW HI, R16
423 ADDU R7, R15 // ADDS R7, R15, R15 (cr=R26)
424 SGTU R7, R15, R26 // ...
425 ADDU R26, R16 // ADC $0, R16, R16
426 ADDU R2, R15, R11 // ADDS R2, R15, R11 (cr=R26)
427 SGTU R2, R11, R26 // ...
428 ADDU R26, R16, R2 // ADC $0, R16, R2
429 MULU R1, R12
430 MOVW LO, R15
431 MOVW HI, R16
432 ADDU R8, R15 // ADDS R8, R15, R15 (cr=R26)
433 SGTU R8, R15, R26 // ...
434 ADDU R26, R16 // ADC $0, R16, R16
435 ADDU R2, R15, R12 // ADDS R2, R15, R12 (cr=R26)
436 SGTU R2, R12, R26 // ...
437 ADDU R26, R16, R2 // ADC $0, R16, R2
438 MULU R1, R13
439 MOVW LO, R15
440 MOVW HI, R16
441 ADDU R9, R15 // ADDS R9, R15, R15 (cr=R26)
442 SGTU R9, R15, R26 // ...
443 ADDU R26, R16 // ADC $0, R16, R16
444 ADDU R2, R15, R13 // ADDS R2, R15, R13 (cr=R26)
445 SGTU R2, R13, R26 // ...
446 ADDU R26, R16, R2 // ADC $0, R16, R2
447 MULU R1, R14
448 MOVW LO, R15
449 MOVW HI, R16
450 ADDU R10, R15 // ADDS R10, R15, R15 (cr=R26)
451 SGTU R10, R15, R26 // ...
452 ADDU R26, R16 // ADC $0, R16, R16
453 ADDU R2, R15, R14 // ADDS R2, R15, R14 (cr=R26)
454 SGTU R2, R14, R26 // ...
455 ADDU R26, R16, R2 // ADC $0, R16, R2
456 MOVW R11, 0(R6)
457 MOVW R12, 4(R6)
458 MOVW R13, 8(R6)
459 MOVW R14, 12(R6)
460 ADDU $16, R4
461 ADDU $16, R5
462 ADDU $16, R6
463 SUBU $1, R3
464 BNE R3, loop4cont
465 loop4done:
466 MOVW R2, c+44(FP)
467 RET
468
View as plain text