// Copyright 2022 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include "go_asm.h" #include "textflag.h" TEXT ·Compare(SB),NOSPLIT,$0-56 // R4 = a_base // R5 = a_len // R6 = a_cap (unused) // R7 = b_base (want in R6) // R8 = b_len (want in R7) // R9 = b_cap (unused) MOVV R7, R6 MOVV R8, R7 JMP cmpbody<>(SB) TEXT runtime·cmpstring(SB),NOSPLIT,$0-40 // R4 = a_base // R5 = a_len // R6 = b_base // R7 = b_len JMP cmpbody<>(SB) // input: // R4: points to the start of a // R5: length of a // R6: points to the start of b // R7: length of b // for regabi the return value (-1/0/1) in R4 TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0 BEQ R4, R6, cmp_len // same start of a and b, then compare lengths SGTU R5, R7, R9 BNE R9, b_lt_a MOVV R5, R14 JMP entry b_lt_a: MOVV R7, R14 entry: BEQ R14, cmp_len // minlength is 0 MOVV $32, R15 BGE R14, R15, lasx tail: MOVV $8, R15 BLT R14, R15, lt_8 generic8_loop: MOVV (R4), R10 MOVV (R6), R11 BEQ R10, R11, generic8_equal cmp8: AND $0xff, R10, R16 AND $0xff, R11, R17 BNE R16, R17, cmp_byte BSTRPICKV $15, R10, $8, R16 BSTRPICKV $15, R11, $8, R17 BNE R16, R17, cmp_byte BSTRPICKV $23, R10, $16, R16 BSTRPICKV $23, R11, $16, R17 BNE R16, R17, cmp_byte BSTRPICKV $31, R10, $24, R16 BSTRPICKV $31, R11, $24, R17 BNE R16, R17, cmp_byte BSTRPICKV $39, R10, $32, R16 BSTRPICKV $39, R11, $32, R17 BNE R16, R17, cmp_byte BSTRPICKV $47, R10, $40, R16 BSTRPICKV $47, R11, $40, R17 BNE R16, R17, cmp_byte BSTRPICKV $55, R10, $48, R16 BSTRPICKV $55, R11, $48, R17 BNE R16, R17, cmp_byte BSTRPICKV $63, R10, $56, R16 BSTRPICKV $63, R11, $56, R17 BNE R16, R17, cmp_byte generic8_equal: ADDV $-8, R14 BEQ R14, cmp_len ADDV $8, R4 ADDV $8, R6 BGE R14, R15, generic8_loop lt_8: MOVV $4, R15 BLT R14, R15, lt_4 MOVWU (R4), R10 MOVWU (R6), R11 BEQ R10, R11, lt_8_equal AND $0xff, R10, R16 AND $0xff, R11, R17 BNE R16, R17, cmp_byte BSTRPICKV $15, R10, $8, R16 BSTRPICKV $15, R11, $8, R17 BNE R16, R17, cmp_byte BSTRPICKV $23, R10, $16, R16 BSTRPICKV $23, R11, $16, R17 BNE R16, R17, cmp_byte BSTRPICKV $31, R10, $24, R16 BSTRPICKV $31, R11, $24, R17 BNE R16, R17, cmp_byte lt_8_equal: ADDV $-4, R14 BEQ R14, cmp_len ADDV $4, R4 ADDV $4, R6 lt_4: MOVV $2, R15 BLT R14, R15, lt_2 MOVHU (R4), R10 MOVHU (R6), R11 BEQ R10, R11, lt_4_equal AND $0xff, R10, R16 AND $0xff, R11, R17 BNE R16, R17, cmp_byte BSTRPICKV $15, R10, $8, R16 BSTRPICKV $15, R11, $8, R17 BNE R16, R17, cmp_byte lt_4_equal: ADDV $-2, R14 BEQ R14, cmp_len ADDV $2, R4 ADDV $2, R6 lt_2: MOVBU (R4), R16 MOVBU (R6), R17 BNE R16, R17, cmp_byte JMP cmp_len // Compare 1 byte taken from R16/R17 that are known to differ. cmp_byte: SGTU R16, R17, R4 // R4 = 1 if (R16 > R17) BNE R0, R4, ret MOVV $-1, R4 RET cmp_len: SGTU R5, R7, R8 SGTU R7, R5, R9 SUBV R9, R8, R4 ret: RET lasx: MOVV $64, R20 MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R9 BEQ R9, lsx MOVV $128, R15 BLT R14, R15, lasx32_loop lasx128_loop: XVMOVQ (R4), X0 XVMOVQ (R6), X1 XVSEQB X0, X1, X0 XVSETANYEQB X0, FCC0 BFPT lasx_found_0 XVMOVQ 32(R4), X0 XVMOVQ 32(R6), X1 XVSEQB X0, X1, X0 XVSETANYEQB X0, FCC0 BFPT lasx_found_32 XVMOVQ 64(R4), X0 XVMOVQ 64(R6), X1 XVSEQB X0, X1, X0 XVSETANYEQB X0, FCC0 BFPT lasx_found_64 XVMOVQ 96(R4), X0 XVMOVQ 96(R6), X1 XVSEQB X0, X1, X0 XVSETANYEQB X0, FCC0 BFPT lasx_found_96 ADDV $-128, R14 BEQ R14, cmp_len ADDV $128, R4 ADDV $128, R6 BGE R14, R15, lasx128_loop MOVV $32, R15 BLT R14, R15, tail lasx32_loop: XVMOVQ (R4), X0 XVMOVQ (R6), X1 XVSEQB X0, X1, X0 XVSETANYEQB X0, FCC0 BFPT lasx_found_0 ADDV $-32, R14 BEQ R14, cmp_len ADDV $32, R4 ADDV $32, R6 BGE R14, R15, lasx32_loop JMP tail lasx_found_0: MOVV R0, R11 JMP lasx_find_byte lasx_found_32: MOVV $32, R11 JMP lasx_find_byte lasx_found_64: MOVV $64, R11 JMP lasx_find_byte lasx_found_96: MOVV $96, R11 lasx_find_byte: XVMOVQ X0.V[0], R10 CTOV R10, R10 BNE R10, R20, find_byte ADDV $8, R11 XVMOVQ X0.V[1], R10 CTOV R10, R10 BNE R10, R20, find_byte ADDV $8, R11 XVMOVQ X0.V[2], R10 CTOV R10, R10 BNE R10, R20, find_byte ADDV $8, R11 XVMOVQ X0.V[3], R10 CTOV R10, R10 JMP find_byte lsx: MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R9 BEQ R9, generic32_loop MOVV $64, R15 BLT R14, R15, lsx16_loop lsx64_loop: VMOVQ (R4), V0 VMOVQ (R6), V1 VSEQB V0, V1, V0 VSETANYEQB V0, FCC0 BFPT lsx_found_0 VMOVQ 16(R4), V0 VMOVQ 16(R6), V1 VSEQB V0, V1, V0 VSETANYEQB V0, FCC0 BFPT lsx_found_16 VMOVQ 32(R4), V0 VMOVQ 32(R6), V1 VSEQB V0, V1, V0 VSETANYEQB V0, FCC0 BFPT lsx_found_32 VMOVQ 48(R4), V0 VMOVQ 48(R6), V1 VSEQB V0, V1, V0 VSETANYEQB V0, FCC0 BFPT lsx_found_48 ADDV $-64, R14 BEQ R14, cmp_len ADDV $64, R4 ADDV $64, R6 BGE R14, R15, lsx64_loop MOVV $16, R15 BLT R14, R15, tail lsx16_loop: VMOVQ (R4), V0 VMOVQ (R6), V1 VSEQB V0, V1, V0 VSETANYEQB V0, FCC0 BFPT lsx_found_0 ADDV $-16, R14 BEQ R14, cmp_len ADDV $16, R4 ADDV $16, R6 BGE R14, R15, lsx16_loop JMP tail lsx_found_0: MOVV R0, R11 JMP lsx_find_byte lsx_found_16: MOVV $16, R11 JMP lsx_find_byte lsx_found_32: MOVV $32, R11 JMP lsx_find_byte lsx_found_48: MOVV $48, R11 lsx_find_byte: VMOVQ V0.V[0], R10 CTOV R10, R10 BNE R10, R20, find_byte ADDV $8, R11 VMOVQ V0.V[1], R10 CTOV R10, R10 find_byte: SRLV $3, R10 ADDV R10, R11 ADDV R11, R4 ADDV R11, R6 MOVB (R4), R16 MOVB (R6), R17 JMP cmp_byte generic32_loop: MOVV (R4), R10 MOVV (R6), R11 BNE R10, R11, cmp8 MOVV 8(R4), R10 MOVV 8(R6), R11 BNE R10, R11, cmp8 MOVV 16(R4), R10 MOVV 16(R6), R11 BNE R10, R11, cmp8 MOVV 24(R4), R10 MOVV 24(R6), R11 BNE R10, R11, cmp8 ADDV $-32, R14 BEQ R14, cmp_len ADDV $32, R4 ADDV $32, R6 MOVV $32, R15 BGE R14, R15, generic32_loop JMP tail