Text file src/math/big/arith_arm.s

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
     6  
     7  //go:build !math_big_pure_go
     8  
     9  #include "textflag.h"
    10  
    11  // func addVV(z, x, y []Word) (c Word)
    12  TEXT ·addVV(SB), NOSPLIT, $0
    13  	MOVW z_len+4(FP), R0
    14  	MOVW x_base+12(FP), R1
    15  	MOVW y_base+24(FP), R2
    16  	MOVW z_base+0(FP), R3
    17  	// compute unrolled loop lengths
    18  	AND $3, R0, R4
    19  	MOVW R0>>2, R0
    20  	ADD.S $0, R0	// clear carry
    21  loop1:
    22  	TEQ $0, R4; BEQ loop1done
    23  loop1cont:
    24  	// unroll 1X
    25  	MOVW.P 4(R1), R5
    26  	MOVW.P 4(R2), R6
    27  	ADC.S R6, R5
    28  	MOVW.P R5, 4(R3)
    29  	SUB $1, R4
    30  	TEQ $0, R4; BNE loop1cont
    31  loop1done:
    32  loop4:
    33  	TEQ $0, R0; BEQ loop4done
    34  loop4cont:
    35  	// unroll 4X
    36  	MOVW.P 4(R1), R4
    37  	MOVW.P 4(R1), R5
    38  	MOVW.P 4(R1), R6
    39  	MOVW.P 4(R1), R7
    40  	MOVW.P 4(R2), R8
    41  	MOVW.P 4(R2), R9
    42  	MOVW.P 4(R2), R11
    43  	MOVW.P 4(R2), R12
    44  	ADC.S R8, R4
    45  	ADC.S R9, R5
    46  	ADC.S R11, R6
    47  	ADC.S R12, R7
    48  	MOVW.P R4, 4(R3)
    49  	MOVW.P R5, 4(R3)
    50  	MOVW.P R6, 4(R3)
    51  	MOVW.P R7, 4(R3)
    52  	SUB $1, R0
    53  	TEQ $0, R0; BNE loop4cont
    54  loop4done:
    55  	SBC R1, R1	// save carry
    56  	ADD $1, R1	// convert add carry
    57  	MOVW R1, c+36(FP)
    58  	RET
    59  
    60  // func subVV(z, x, y []Word) (c Word)
    61  TEXT ·subVV(SB), NOSPLIT, $0
    62  	MOVW z_len+4(FP), R0
    63  	MOVW x_base+12(FP), R1
    64  	MOVW y_base+24(FP), R2
    65  	MOVW z_base+0(FP), R3
    66  	// compute unrolled loop lengths
    67  	AND $3, R0, R4
    68  	MOVW R0>>2, R0
    69  	SUB.S $0, R0	// clear carry
    70  loop1:
    71  	TEQ $0, R4; BEQ loop1done
    72  loop1cont:
    73  	// unroll 1X
    74  	MOVW.P 4(R1), R5
    75  	MOVW.P 4(R2), R6
    76  	SBC.S R6, R5
    77  	MOVW.P R5, 4(R3)
    78  	SUB $1, R4
    79  	TEQ $0, R4; BNE loop1cont
    80  loop1done:
    81  loop4:
    82  	TEQ $0, R0; BEQ loop4done
    83  loop4cont:
    84  	// unroll 4X
    85  	MOVW.P 4(R1), R4
    86  	MOVW.P 4(R1), R5
    87  	MOVW.P 4(R1), R6
    88  	MOVW.P 4(R1), R7
    89  	MOVW.P 4(R2), R8
    90  	MOVW.P 4(R2), R9
    91  	MOVW.P 4(R2), R11
    92  	MOVW.P 4(R2), R12
    93  	SBC.S R8, R4
    94  	SBC.S R9, R5
    95  	SBC.S R11, R6
    96  	SBC.S R12, R7
    97  	MOVW.P R4, 4(R3)
    98  	MOVW.P R5, 4(R3)
    99  	MOVW.P R6, 4(R3)
   100  	MOVW.P R7, 4(R3)
   101  	SUB $1, R0
   102  	TEQ $0, R0; BNE loop4cont
   103  loop4done:
   104  	SBC R1, R1	// save carry
   105  	RSB $0, R1, R1	// convert sub carry
   106  	MOVW R1, c+36(FP)
   107  	RET
   108  
   109  // func lshVU(z, x []Word, s uint) (c Word)
   110  TEXT ·lshVU(SB), NOSPLIT, $0
   111  	MOVW z_len+4(FP), R0
   112  	TEQ $0, R0; BEQ ret0
   113  	MOVW s+24(FP), R1
   114  	MOVW x_base+12(FP), R2
   115  	MOVW z_base+0(FP), R3
   116  	// run loop backward
   117  	ADD R0<<2, R2, R2
   118  	ADD R0<<2, R3, R3
   119  	// shift first word into carry
   120  	MOVW.W -4(R2), R4
   121  	MOVW $32, R5
   122  	SUB R1, R5
   123  	MOVW R4>>R5, R6
   124  	MOVW R4<<R1, R4
   125  	MOVW R6, c+28(FP)
   126  	// shift remaining words
   127  	SUB $1, R0
   128  	// compute unrolled loop lengths
   129  	AND $3, R0, R6
   130  	MOVW R0>>2, R0
   131  loop1:
   132  	TEQ $0, R6; BEQ loop1done
   133  loop1cont:
   134  	// unroll 1X
   135  	MOVW.W -4(R2), R7
   136  	ORR R7>>R5, R4
   137  	MOVW.W R4, -4(R3)
   138  	MOVW R7<<R1, R4
   139  	SUB $1, R6
   140  	TEQ $0, R6; BNE loop1cont
   141  loop1done:
   142  loop4:
   143  	TEQ $0, R0; BEQ loop4done
   144  loop4cont:
   145  	// unroll 4X
   146  	MOVW.W -4(R2), R6
   147  	MOVW.W -4(R2), R7
   148  	MOVW.W -4(R2), R8
   149  	MOVW.W -4(R2), R9
   150  	ORR R6>>R5, R4
   151  	MOVW.W R4, -4(R3)
   152  	MOVW R6<<R1, R4
   153  	ORR R7>>R5, R4
   154  	MOVW.W R4, -4(R3)
   155  	MOVW R7<<R1, R4
   156  	ORR R8>>R5, R4
   157  	MOVW.W R4, -4(R3)
   158  	MOVW R8<<R1, R4
   159  	ORR R9>>R5, R4
   160  	MOVW.W R4, -4(R3)
   161  	MOVW R9<<R1, R4
   162  	SUB $1, R0
   163  	TEQ $0, R0; BNE loop4cont
   164  loop4done:
   165  	// store final shifted bits
   166  	MOVW.W R4, -4(R3)
   167  	RET
   168  ret0:
   169  	MOVW $0, R1
   170  	MOVW R1, c+28(FP)
   171  	RET
   172  
   173  // func rshVU(z, x []Word, s uint) (c Word)
   174  TEXT ·rshVU(SB), NOSPLIT, $0
   175  	MOVW z_len+4(FP), R0
   176  	TEQ $0, R0; BEQ ret0
   177  	MOVW s+24(FP), R1
   178  	MOVW x_base+12(FP), R2
   179  	MOVW z_base+0(FP), R3
   180  	// shift first word into carry
   181  	MOVW.P 4(R2), R4
   182  	MOVW $32, R5
   183  	SUB R1, R5
   184  	MOVW R4<<R5, R6
   185  	MOVW R4>>R1, R4
   186  	MOVW R6, c+28(FP)
   187  	// shift remaining words
   188  	SUB $1, R0
   189  	// compute unrolled loop lengths
   190  	AND $3, R0, R6
   191  	MOVW R0>>2, R0
   192  loop1:
   193  	TEQ $0, R6; BEQ loop1done
   194  loop1cont:
   195  	// unroll 1X
   196  	MOVW.P 4(R2), R7
   197  	ORR R7<<R5, R4
   198  	MOVW.P R4, 4(R3)
   199  	MOVW R7>>R1, R4
   200  	SUB $1, R6
   201  	TEQ $0, R6; BNE loop1cont
   202  loop1done:
   203  loop4:
   204  	TEQ $0, R0; BEQ loop4done
   205  loop4cont:
   206  	// unroll 4X
   207  	MOVW.P 4(R2), R6
   208  	MOVW.P 4(R2), R7
   209  	MOVW.P 4(R2), R8
   210  	MOVW.P 4(R2), R9
   211  	ORR R6<<R5, R4
   212  	MOVW.P R4, 4(R3)
   213  	MOVW R6>>R1, R4
   214  	ORR R7<<R5, R4
   215  	MOVW.P R4, 4(R3)
   216  	MOVW R7>>R1, R4
   217  	ORR R8<<R5, R4
   218  	MOVW.P R4, 4(R3)
   219  	MOVW R8>>R1, R4
   220  	ORR R9<<R5, R4
   221  	MOVW.P R4, 4(R3)
   222  	MOVW R9>>R1, R4
   223  	SUB $1, R0
   224  	TEQ $0, R0; BNE loop4cont
   225  loop4done:
   226  	// store final shifted bits
   227  	MOVW.P R4, 4(R3)
   228  	RET
   229  ret0:
   230  	MOVW $0, R1
   231  	MOVW R1, c+28(FP)
   232  	RET
   233  
   234  // func mulAddVWW(z, x []Word, m, a Word) (c Word)
   235  TEXT ·mulAddVWW(SB), NOSPLIT, $0
   236  	MOVW m+24(FP), R0
   237  	MOVW a+28(FP), R1
   238  	MOVW z_len+4(FP), R2
   239  	MOVW x_base+12(FP), R3
   240  	MOVW z_base+0(FP), R4
   241  	// compute unrolled loop lengths
   242  	AND $3, R2, R5
   243  	MOVW R2>>2, R2
   244  loop1:
   245  	TEQ $0, R5; BEQ loop1done
   246  loop1cont:
   247  	// unroll 1X
   248  	MOVW.P 4(R3), R6
   249  	// multiply
   250  	MULLU R0, R6, (R7, R6)
   251  	ADD.S R1, R6
   252  	ADC $0, R7, R1
   253  	MOVW.P R6, 4(R4)
   254  	SUB $1, R5
   255  	TEQ $0, R5; BNE loop1cont
   256  loop1done:
   257  loop4:
   258  	TEQ $0, R2; BEQ loop4done
   259  loop4cont:
   260  	// unroll 4X in batches of 2
   261  	MOVW.P 4(R3), R5
   262  	MOVW.P 4(R3), R6
   263  	// multiply
   264  	MULLU R0, R5, (R7, R5)
   265  	ADD.S R1, R5
   266  	MULLU R0, R6, (R8, R6)
   267  	ADC.S R7, R6
   268  	ADC $0, R8, R1
   269  	MOVW.P R5, 4(R4)
   270  	MOVW.P R6, 4(R4)
   271  	MOVW.P 4(R3), R5
   272  	MOVW.P 4(R3), R6
   273  	// multiply
   274  	MULLU R0, R5, (R7, R5)
   275  	ADD.S R1, R5
   276  	MULLU R0, R6, (R8, R6)
   277  	ADC.S R7, R6
   278  	ADC $0, R8, R1
   279  	MOVW.P R5, 4(R4)
   280  	MOVW.P R6, 4(R4)
   281  	SUB $1, R2
   282  	TEQ $0, R2; BNE loop4cont
   283  loop4done:
   284  	MOVW R1, c+32(FP)
   285  	RET
   286  
   287  // func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
   288  TEXT ·addMulVVWW(SB), NOSPLIT, $0
   289  	MOVW m+36(FP), R0
   290  	MOVW a+40(FP), R1
   291  	MOVW z_len+4(FP), R2
   292  	MOVW x_base+12(FP), R3
   293  	MOVW y_base+24(FP), R4
   294  	MOVW z_base+0(FP), R5
   295  	// compute unrolled loop lengths
   296  	AND $3, R2, R6
   297  	MOVW R2>>2, R2
   298  loop1:
   299  	TEQ $0, R6; BEQ loop1done
   300  loop1cont:
   301  	// unroll 1X
   302  	MOVW.P 4(R3), R7
   303  	MOVW.P 4(R4), R8
   304  	// multiply
   305  	MULLU R0, R8, (R9, R8)
   306  	ADD.S R1, R8
   307  	ADC $0, R9, R1
   308  	// add
   309  	ADD.S R7, R8
   310  	ADC $0, R1
   311  	MOVW.P R8, 4(R5)
   312  	SUB $1, R6
   313  	TEQ $0, R6; BNE loop1cont
   314  loop1done:
   315  loop4:
   316  	TEQ $0, R2; BEQ loop4done
   317  loop4cont:
   318  	// unroll 4X in batches of 2
   319  	MOVW.P 4(R3), R6
   320  	MOVW.P 4(R3), R7
   321  	MOVW.P 4(R4), R8
   322  	MOVW.P 4(R4), R9
   323  	// multiply
   324  	MULLU R0, R8, (R11, R8)
   325  	ADD.S R1, R8
   326  	MULLU R0, R9, (R12, R9)
   327  	ADC.S R11, R9
   328  	ADC $0, R12, R1
   329  	// add
   330  	ADD.S R6, R8
   331  	ADC.S R7, R9
   332  	ADC $0, R1
   333  	MOVW.P R8, 4(R5)
   334  	MOVW.P R9, 4(R5)
   335  	MOVW.P 4(R3), R6
   336  	MOVW.P 4(R3), R7
   337  	MOVW.P 4(R4), R8
   338  	MOVW.P 4(R4), R9
   339  	// multiply
   340  	MULLU R0, R8, (R11, R8)
   341  	ADD.S R1, R8
   342  	MULLU R0, R9, (R12, R9)
   343  	ADC.S R11, R9
   344  	ADC $0, R12, R1
   345  	// add
   346  	ADD.S R6, R8
   347  	ADC.S R7, R9
   348  	ADC $0, R1
   349  	MOVW.P R8, 4(R5)
   350  	MOVW.P R9, 4(R5)
   351  	SUB $1, R2
   352  	TEQ $0, R2; BNE loop4cont
   353  loop4done:
   354  	MOVW R1, c+44(FP)
   355  	RET
   356  

View as plain text