// Copyright 2025 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT. //go:build !math_big_pure_go #include "textflag.h" // func addVV(z, x, y []Word) (c Word) TEXT ·addVV(SB), NOSPLIT, $0 MOVD z_len+8(FP), R0 MOVD x_base+24(FP), R1 MOVD y_base+48(FP), R2 MOVD z_base+0(FP), R3 // compute unrolled loop lengths AND $3, R0, R4 LSR $2, R0 ADDS ZR, R0 // clear carry loop1: CBZ R4, loop1done loop1cont: // unroll 1X MOVD.P 8(R1), R5 MOVD.P 8(R2), R6 ADCS R6, R5 MOVD.P R5, 8(R3) SUB $1, R4 CBNZ R4, loop1cont loop1done: loop4: CBZ R0, loop4done loop4cont: // unroll 4X LDP.P 32(R1), (R4, R5) LDP -16(R1), (R6, R7) LDP.P 32(R2), (R8, R9) LDP -16(R2), (R10, R11) ADCS R8, R4 ADCS R9, R5 ADCS R10, R6 ADCS R11, R7 STP.P (R4, R5), 32(R3) STP (R6, R7), -16(R3) SUB $1, R0 CBNZ R0, loop4cont loop4done: ADC ZR, ZR, R1 // save & convert add carry MOVD R1, c+72(FP) RET // func subVV(z, x, y []Word) (c Word) TEXT ·subVV(SB), NOSPLIT, $0 MOVD z_len+8(FP), R0 MOVD x_base+24(FP), R1 MOVD y_base+48(FP), R2 MOVD z_base+0(FP), R3 // compute unrolled loop lengths AND $3, R0, R4 LSR $2, R0 SUBS ZR, R0 // clear carry loop1: CBZ R4, loop1done loop1cont: // unroll 1X MOVD.P 8(R1), R5 MOVD.P 8(R2), R6 SBCS R6, R5 MOVD.P R5, 8(R3) SUB $1, R4 CBNZ R4, loop1cont loop1done: loop4: CBZ R0, loop4done loop4cont: // unroll 4X LDP.P 32(R1), (R4, R5) LDP -16(R1), (R6, R7) LDP.P 32(R2), (R8, R9) LDP -16(R2), (R10, R11) SBCS R8, R4 SBCS R9, R5 SBCS R10, R6 SBCS R11, R7 STP.P (R4, R5), 32(R3) STP (R6, R7), -16(R3) SUB $1, R0 CBNZ R0, loop4cont loop4done: SBC R1, R1 // save carry SUB R1, ZR, R1 // convert sub carry MOVD R1, c+72(FP) RET // func lshVU(z, x []Word, s uint) (c Word) TEXT ·lshVU(SB), NOSPLIT, $0 MOVD z_len+8(FP), R0 CBZ R0, ret0 MOVD s+48(FP), R1 MOVD x_base+24(FP), R2 MOVD z_base+0(FP), R3 // run loop backward ADD R0<<3, R2, R2 ADD R0<<3, R3, R3 // shift first word into carry MOVD.W -8(R2), R4 MOVD $64, R5 SUB R1, R5 LSR R5, R4, R6 LSL R1, R4 MOVD R6, c+56(FP) // shift remaining words SUB $1, R0 // compute unrolled loop lengths AND $3, R0, R6 LSR $2, R0 loop1: CBZ R6, loop1done loop1cont: // unroll 1X MOVD.W -8(R2), R7 LSR R5, R7, R8 ORR R4, R8 LSL R1, R7, R4 MOVD.W R8, -8(R3) SUB $1, R6 CBNZ R6, loop1cont loop1done: loop4: CBZ R0, loop4done loop4cont: // unroll 4X LDP.W -32(R2), (R9, R8) LDP 16(R2), (R7, R6) LSR R5, R6, R10 ORR R4, R10 LSL R1, R6, R4 LSR R5, R7, R6 ORR R4, R6 LSL R1, R7, R4 LSR R5, R8, R7 ORR R4, R7 LSL R1, R8, R4 LSR R5, R9, R8 ORR R4, R8 LSL R1, R9, R4 STP.W (R8, R7), -32(R3) STP (R6, R10), 16(R3) SUB $1, R0 CBNZ R0, loop4cont loop4done: // store final shifted bits MOVD.W R4, -8(R3) RET ret0: MOVD ZR, c+56(FP) RET // func rshVU(z, x []Word, s uint) (c Word) TEXT ·rshVU(SB), NOSPLIT, $0 MOVD z_len+8(FP), R0 CBZ R0, ret0 MOVD s+48(FP), R1 MOVD x_base+24(FP), R2 MOVD z_base+0(FP), R3 // shift first word into carry MOVD.P 8(R2), R4 MOVD $64, R5 SUB R1, R5 LSL R5, R4, R6 LSR R1, R4 MOVD R6, c+56(FP) // shift remaining words SUB $1, R0 // compute unrolled loop lengths AND $3, R0, R6 LSR $2, R0 loop1: CBZ R6, loop1done loop1cont: // unroll 1X MOVD.P 8(R2), R7 LSL R5, R7, R8 ORR R4, R8 LSR R1, R7, R4 MOVD.P R8, 8(R3) SUB $1, R6 CBNZ R6, loop1cont loop1done: loop4: CBZ R0, loop4done loop4cont: // unroll 4X LDP.P 32(R2), (R6, R7) LDP -16(R2), (R8, R9) LSL R5, R6, R10 ORR R4, R10 LSR R1, R6, R4 LSL R5, R7, R6 ORR R4, R6 LSR R1, R7, R4 LSL R5, R8, R7 ORR R4, R7 LSR R1, R8, R4 LSL R5, R9, R8 ORR R4, R8 LSR R1, R9, R4 STP.P (R10, R6), 32(R3) STP (R7, R8), -16(R3) SUB $1, R0 CBNZ R0, loop4cont loop4done: // store final shifted bits MOVD.P R4, 8(R3) RET ret0: MOVD ZR, c+56(FP) RET // func mulAddVWW(z, x []Word, m, a Word) (c Word) TEXT ·mulAddVWW(SB), NOSPLIT, $0 MOVD m+48(FP), R0 MOVD a+56(FP), R1 MOVD z_len+8(FP), R2 MOVD x_base+24(FP), R3 MOVD z_base+0(FP), R4 // compute unrolled loop lengths AND $7, R2, R5 LSR $3, R2 loop1: CBZ R5, loop1done loop1cont: // unroll 1X MOVD.P 8(R3), R6 // multiply UMULH R0, R6, R7 MUL R0, R6 ADDS R1, R6 ADC ZR, R7, R1 MOVD.P R6, 8(R4) SUB $1, R5 CBNZ R5, loop1cont loop1done: loop8: CBZ R2, loop8done loop8cont: // unroll 8X LDP.P 64(R3), (R5, R6) LDP -48(R3), (R7, R8) LDP -32(R3), (R9, R10) LDP -16(R3), (R11, R12) // multiply UMULH R0, R5, R13 MUL R0, R5 ADDS R1, R5 UMULH R0, R6, R14 MUL R0, R6 ADCS R13, R6 UMULH R0, R7, R13 MUL R0, R7 ADCS R14, R7 UMULH R0, R8, R14 MUL R0, R8 ADCS R13, R8 UMULH R0, R9, R13 MUL R0, R9 ADCS R14, R9 UMULH R0, R10, R14 MUL R0, R10 ADCS R13, R10 UMULH R0, R11, R13 MUL R0, R11 ADCS R14, R11 UMULH R0, R12, R14 MUL R0, R12 ADCS R13, R12 ADC ZR, R14, R1 STP.P (R5, R6), 64(R4) STP (R7, R8), -48(R4) STP (R9, R10), -32(R4) STP (R11, R12), -16(R4) SUB $1, R2 CBNZ R2, loop8cont loop8done: MOVD R1, c+64(FP) RET // func addMulVVWW(z, x, y []Word, m, a Word) (c Word) TEXT ·addMulVVWW(SB), NOSPLIT, $0 MOVD m+72(FP), R0 MOVD a+80(FP), R1 MOVD z_len+8(FP), R2 MOVD x_base+24(FP), R3 MOVD y_base+48(FP), R4 MOVD z_base+0(FP), R5 // compute unrolled loop lengths AND $7, R2, R6 LSR $3, R2 loop1: CBZ R6, loop1done loop1cont: // unroll 1X MOVD.P 8(R3), R7 MOVD.P 8(R4), R8 // multiply UMULH R0, R8, R9 MUL R0, R8 ADDS R1, R8 ADC ZR, R9, R1 // add ADDS R7, R8 ADC ZR, R1 MOVD.P R8, 8(R5) SUB $1, R6 CBNZ R6, loop1cont loop1done: loop8: CBZ R2, loop8done loop8cont: // unroll 8X LDP.P 64(R3), (R6, R7) LDP -48(R3), (R8, R9) LDP -32(R3), (R10, R11) LDP -16(R3), (R12, R13) LDP.P 64(R4), (R14, R15) LDP -48(R4), (R16, R17) LDP -32(R4), (R19, R20) LDP -16(R4), (R21, R22) // multiply UMULH R0, R14, R23 MUL R0, R14 ADDS R1, R14 UMULH R0, R15, R24 MUL R0, R15 ADCS R23, R15 UMULH R0, R16, R23 MUL R0, R16 ADCS R24, R16 UMULH R0, R17, R24 MUL R0, R17 ADCS R23, R17 UMULH R0, R19, R23 MUL R0, R19 ADCS R24, R19 UMULH R0, R20, R24 MUL R0, R20 ADCS R23, R20 UMULH R0, R21, R23 MUL R0, R21 ADCS R24, R21 UMULH R0, R22, R24 MUL R0, R22 ADCS R23, R22 ADC ZR, R24, R1 // add ADDS R6, R14 ADCS R7, R15 ADCS R8, R16 ADCS R9, R17 ADCS R10, R19 ADCS R11, R20 ADCS R12, R21 ADCS R13, R22 ADC ZR, R1 STP.P (R14, R15), 64(R5) STP (R16, R17), -48(R5) STP (R19, R20), -32(R5) STP (R21, R22), -16(R5) SUB $1, R2 CBNZ R2, loop8cont loop8done: MOVD R1, c+88(FP) RET