Text file
src/math/big/arith_arm.s
1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
6
7 //go:build !math_big_pure_go
8
9 #include "textflag.h"
10
11 // func addVV(z, x, y []Word) (c Word)
12 TEXT ·addVV(SB), NOSPLIT, $0
13 MOVW z_len+4(FP), R0
14 MOVW x_base+12(FP), R1
15 MOVW y_base+24(FP), R2
16 MOVW z_base+0(FP), R3
17 // compute unrolled loop lengths
18 AND $3, R0, R4
19 MOVW R0>>2, R0
20 ADD.S $0, R0 // clear carry
21 loop1:
22 TEQ $0, R4; BEQ loop1done
23 loop1cont:
24 // unroll 1X
25 MOVW.P 4(R1), R5
26 MOVW.P 4(R2), R6
27 ADC.S R6, R5
28 MOVW.P R5, 4(R3)
29 SUB $1, R4
30 TEQ $0, R4; BNE loop1cont
31 loop1done:
32 loop4:
33 TEQ $0, R0; BEQ loop4done
34 loop4cont:
35 // unroll 4X
36 MOVW.P 4(R1), R4
37 MOVW.P 4(R1), R5
38 MOVW.P 4(R1), R6
39 MOVW.P 4(R1), R7
40 MOVW.P 4(R2), R8
41 MOVW.P 4(R2), R9
42 MOVW.P 4(R2), R11
43 MOVW.P 4(R2), R12
44 ADC.S R8, R4
45 ADC.S R9, R5
46 ADC.S R11, R6
47 ADC.S R12, R7
48 MOVW.P R4, 4(R3)
49 MOVW.P R5, 4(R3)
50 MOVW.P R6, 4(R3)
51 MOVW.P R7, 4(R3)
52 SUB $1, R0
53 TEQ $0, R0; BNE loop4cont
54 loop4done:
55 SBC R1, R1 // save carry
56 ADD $1, R1 // convert add carry
57 MOVW R1, c+36(FP)
58 RET
59
60 // func subVV(z, x, y []Word) (c Word)
61 TEXT ·subVV(SB), NOSPLIT, $0
62 MOVW z_len+4(FP), R0
63 MOVW x_base+12(FP), R1
64 MOVW y_base+24(FP), R2
65 MOVW z_base+0(FP), R3
66 // compute unrolled loop lengths
67 AND $3, R0, R4
68 MOVW R0>>2, R0
69 SUB.S $0, R0 // clear carry
70 loop1:
71 TEQ $0, R4; BEQ loop1done
72 loop1cont:
73 // unroll 1X
74 MOVW.P 4(R1), R5
75 MOVW.P 4(R2), R6
76 SBC.S R6, R5
77 MOVW.P R5, 4(R3)
78 SUB $1, R4
79 TEQ $0, R4; BNE loop1cont
80 loop1done:
81 loop4:
82 TEQ $0, R0; BEQ loop4done
83 loop4cont:
84 // unroll 4X
85 MOVW.P 4(R1), R4
86 MOVW.P 4(R1), R5
87 MOVW.P 4(R1), R6
88 MOVW.P 4(R1), R7
89 MOVW.P 4(R2), R8
90 MOVW.P 4(R2), R9
91 MOVW.P 4(R2), R11
92 MOVW.P 4(R2), R12
93 SBC.S R8, R4
94 SBC.S R9, R5
95 SBC.S R11, R6
96 SBC.S R12, R7
97 MOVW.P R4, 4(R3)
98 MOVW.P R5, 4(R3)
99 MOVW.P R6, 4(R3)
100 MOVW.P R7, 4(R3)
101 SUB $1, R0
102 TEQ $0, R0; BNE loop4cont
103 loop4done:
104 SBC R1, R1 // save carry
105 RSB $0, R1, R1 // convert sub carry
106 MOVW R1, c+36(FP)
107 RET
108
109 // func lshVU(z, x []Word, s uint) (c Word)
110 TEXT ·lshVU(SB), NOSPLIT, $0
111 MOVW z_len+4(FP), R0
112 TEQ $0, R0; BEQ ret0
113 MOVW s+24(FP), R1
114 MOVW x_base+12(FP), R2
115 MOVW z_base+0(FP), R3
116 // run loop backward
117 ADD R0<<2, R2, R2
118 ADD R0<<2, R3, R3
119 // shift first word into carry
120 MOVW.W -4(R2), R4
121 MOVW $32, R5
122 SUB R1, R5
123 MOVW R4>>R5, R6
124 MOVW R4<<R1, R4
125 MOVW R6, c+28(FP)
126 // shift remaining words
127 SUB $1, R0
128 // compute unrolled loop lengths
129 AND $3, R0, R6
130 MOVW R0>>2, R0
131 loop1:
132 TEQ $0, R6; BEQ loop1done
133 loop1cont:
134 // unroll 1X
135 MOVW.W -4(R2), R7
136 ORR R7>>R5, R4
137 MOVW.W R4, -4(R3)
138 MOVW R7<<R1, R4
139 SUB $1, R6
140 TEQ $0, R6; BNE loop1cont
141 loop1done:
142 loop4:
143 TEQ $0, R0; BEQ loop4done
144 loop4cont:
145 // unroll 4X
146 MOVW.W -4(R2), R6
147 MOVW.W -4(R2), R7
148 MOVW.W -4(R2), R8
149 MOVW.W -4(R2), R9
150 ORR R6>>R5, R4
151 MOVW.W R4, -4(R3)
152 MOVW R6<<R1, R4
153 ORR R7>>R5, R4
154 MOVW.W R4, -4(R3)
155 MOVW R7<<R1, R4
156 ORR R8>>R5, R4
157 MOVW.W R4, -4(R3)
158 MOVW R8<<R1, R4
159 ORR R9>>R5, R4
160 MOVW.W R4, -4(R3)
161 MOVW R9<<R1, R4
162 SUB $1, R0
163 TEQ $0, R0; BNE loop4cont
164 loop4done:
165 // store final shifted bits
166 MOVW.W R4, -4(R3)
167 RET
168 ret0:
169 MOVW $0, R1
170 MOVW R1, c+28(FP)
171 RET
172
173 // func rshVU(z, x []Word, s uint) (c Word)
174 TEXT ·rshVU(SB), NOSPLIT, $0
175 MOVW z_len+4(FP), R0
176 TEQ $0, R0; BEQ ret0
177 MOVW s+24(FP), R1
178 MOVW x_base+12(FP), R2
179 MOVW z_base+0(FP), R3
180 // shift first word into carry
181 MOVW.P 4(R2), R4
182 MOVW $32, R5
183 SUB R1, R5
184 MOVW R4<<R5, R6
185 MOVW R4>>R1, R4
186 MOVW R6, c+28(FP)
187 // shift remaining words
188 SUB $1, R0
189 // compute unrolled loop lengths
190 AND $3, R0, R6
191 MOVW R0>>2, R0
192 loop1:
193 TEQ $0, R6; BEQ loop1done
194 loop1cont:
195 // unroll 1X
196 MOVW.P 4(R2), R7
197 ORR R7<<R5, R4
198 MOVW.P R4, 4(R3)
199 MOVW R7>>R1, R4
200 SUB $1, R6
201 TEQ $0, R6; BNE loop1cont
202 loop1done:
203 loop4:
204 TEQ $0, R0; BEQ loop4done
205 loop4cont:
206 // unroll 4X
207 MOVW.P 4(R2), R6
208 MOVW.P 4(R2), R7
209 MOVW.P 4(R2), R8
210 MOVW.P 4(R2), R9
211 ORR R6<<R5, R4
212 MOVW.P R4, 4(R3)
213 MOVW R6>>R1, R4
214 ORR R7<<R5, R4
215 MOVW.P R4, 4(R3)
216 MOVW R7>>R1, R4
217 ORR R8<<R5, R4
218 MOVW.P R4, 4(R3)
219 MOVW R8>>R1, R4
220 ORR R9<<R5, R4
221 MOVW.P R4, 4(R3)
222 MOVW R9>>R1, R4
223 SUB $1, R0
224 TEQ $0, R0; BNE loop4cont
225 loop4done:
226 // store final shifted bits
227 MOVW.P R4, 4(R3)
228 RET
229 ret0:
230 MOVW $0, R1
231 MOVW R1, c+28(FP)
232 RET
233
234 // func mulAddVWW(z, x []Word, m, a Word) (c Word)
235 TEXT ·mulAddVWW(SB), NOSPLIT, $0
236 MOVW m+24(FP), R0
237 MOVW a+28(FP), R1
238 MOVW z_len+4(FP), R2
239 MOVW x_base+12(FP), R3
240 MOVW z_base+0(FP), R4
241 // compute unrolled loop lengths
242 AND $3, R2, R5
243 MOVW R2>>2, R2
244 loop1:
245 TEQ $0, R5; BEQ loop1done
246 loop1cont:
247 // unroll 1X
248 MOVW.P 4(R3), R6
249 // multiply
250 MULLU R0, R6, (R7, R6)
251 ADD.S R1, R6
252 ADC $0, R7, R1
253 MOVW.P R6, 4(R4)
254 SUB $1, R5
255 TEQ $0, R5; BNE loop1cont
256 loop1done:
257 loop4:
258 TEQ $0, R2; BEQ loop4done
259 loop4cont:
260 // unroll 4X in batches of 2
261 MOVW.P 4(R3), R5
262 MOVW.P 4(R3), R6
263 // multiply
264 MULLU R0, R5, (R7, R5)
265 ADD.S R1, R5
266 MULLU R0, R6, (R8, R6)
267 ADC.S R7, R6
268 ADC $0, R8, R1
269 MOVW.P R5, 4(R4)
270 MOVW.P R6, 4(R4)
271 MOVW.P 4(R3), R5
272 MOVW.P 4(R3), R6
273 // multiply
274 MULLU R0, R5, (R7, R5)
275 ADD.S R1, R5
276 MULLU R0, R6, (R8, R6)
277 ADC.S R7, R6
278 ADC $0, R8, R1
279 MOVW.P R5, 4(R4)
280 MOVW.P R6, 4(R4)
281 SUB $1, R2
282 TEQ $0, R2; BNE loop4cont
283 loop4done:
284 MOVW R1, c+32(FP)
285 RET
286
287 // func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
288 TEXT ·addMulVVWW(SB), NOSPLIT, $0
289 MOVW m+36(FP), R0
290 MOVW a+40(FP), R1
291 MOVW z_len+4(FP), R2
292 MOVW x_base+12(FP), R3
293 MOVW y_base+24(FP), R4
294 MOVW z_base+0(FP), R5
295 // compute unrolled loop lengths
296 AND $3, R2, R6
297 MOVW R2>>2, R2
298 loop1:
299 TEQ $0, R6; BEQ loop1done
300 loop1cont:
301 // unroll 1X
302 MOVW.P 4(R3), R7
303 MOVW.P 4(R4), R8
304 // multiply
305 MULLU R0, R8, (R9, R8)
306 ADD.S R1, R8
307 ADC $0, R9, R1
308 // add
309 ADD.S R7, R8
310 ADC $0, R1
311 MOVW.P R8, 4(R5)
312 SUB $1, R6
313 TEQ $0, R6; BNE loop1cont
314 loop1done:
315 loop4:
316 TEQ $0, R2; BEQ loop4done
317 loop4cont:
318 // unroll 4X in batches of 2
319 MOVW.P 4(R3), R6
320 MOVW.P 4(R3), R7
321 MOVW.P 4(R4), R8
322 MOVW.P 4(R4), R9
323 // multiply
324 MULLU R0, R8, (R11, R8)
325 ADD.S R1, R8
326 MULLU R0, R9, (R12, R9)
327 ADC.S R11, R9
328 ADC $0, R12, R1
329 // add
330 ADD.S R6, R8
331 ADC.S R7, R9
332 ADC $0, R1
333 MOVW.P R8, 4(R5)
334 MOVW.P R9, 4(R5)
335 MOVW.P 4(R3), R6
336 MOVW.P 4(R3), R7
337 MOVW.P 4(R4), R8
338 MOVW.P 4(R4), R9
339 // multiply
340 MULLU R0, R8, (R11, R8)
341 ADD.S R1, R8
342 MULLU R0, R9, (R12, R9)
343 ADC.S R11, R9
344 ADC $0, R12, R1
345 // add
346 ADD.S R6, R8
347 ADC.S R7, R9
348 ADC $0, R1
349 MOVW.P R8, 4(R5)
350 MOVW.P R9, 4(R5)
351 SUB $1, R2
352 TEQ $0, R2; BNE loop4cont
353 loop4done:
354 MOVW R1, c+44(FP)
355 RET
356
View as plain text