Text file src/internal/bytealg/compare_loong64.s

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  TEXT ·Compare<ABIInternal>(SB),NOSPLIT,$0-56
     9  	// R4 = a_base
    10  	// R5 = a_len
    11  	// R6 = a_cap (unused)
    12  	// R7 = b_base (want in R6)
    13  	// R8 = b_len (want in R7)
    14  	// R9 = b_cap (unused)
    15  	MOVV	R7, R6
    16  	MOVV	R8, R7
    17  	JMP	cmpbody<>(SB)
    18  
    19  TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT,$0-40
    20  	// R4 = a_base
    21  	// R5 = a_len
    22  	// R6 = b_base
    23  	// R7 = b_len
    24  	JMP	cmpbody<>(SB)
    25  
    26  // input:
    27  //    R4: points to the start of a
    28  //    R5: length of a
    29  //    R6: points to the start of b
    30  //    R7: length of b
    31  // for regabi the return value (-1/0/1) in R4
    32  TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0
    33  	BEQ	R4, R6, cmp_len	// same start of a and b, then compare lengths
    34  
    35  	SGTU	R5, R7, R9
    36  	BNE	R9, b_lt_a
    37  	MOVV	R5, R14
    38  	JMP	entry
    39  
    40  b_lt_a:
    41  	MOVV	R7, R14
    42  
    43  entry:
    44  	BEQ	R14, cmp_len	// minlength is 0
    45  
    46  	MOVV	$32, R15
    47  	BGE	R14, R15, lasx
    48  tail:
    49  	MOVV	$8, R15
    50  	BLT	R14, R15, lt_8
    51  generic8_loop:
    52  	MOVV	(R4), R10
    53  	MOVV	(R6), R11
    54  	BEQ	R10, R11, generic8_equal
    55  
    56  cmp8:
    57  	AND	$0xff, R10, R16
    58  	AND	$0xff, R11, R17
    59  	BNE	R16, R17, cmp_byte
    60  
    61  	BSTRPICKV	$15, R10, $8, R16
    62  	BSTRPICKV	$15, R11, $8, R17
    63  	BNE	R16, R17, cmp_byte
    64  
    65  	BSTRPICKV	$23, R10, $16, R16
    66  	BSTRPICKV	$23, R11, $16, R17
    67  	BNE	R16, R17, cmp_byte
    68  
    69  	BSTRPICKV	$31, R10, $24, R16
    70  	BSTRPICKV	$31, R11, $24, R17
    71  	BNE	R16, R17, cmp_byte
    72  
    73  	BSTRPICKV	$39, R10, $32, R16
    74  	BSTRPICKV	$39, R11, $32, R17
    75  	BNE	R16, R17, cmp_byte
    76  
    77  	BSTRPICKV	$47, R10, $40, R16
    78  	BSTRPICKV	$47, R11, $40, R17
    79  	BNE	R16, R17, cmp_byte
    80  
    81  	BSTRPICKV	$55, R10, $48, R16
    82  	BSTRPICKV	$55, R11, $48, R17
    83  	BNE	R16, R17, cmp_byte
    84  
    85  	BSTRPICKV	$63, R10, $56, R16
    86  	BSTRPICKV	$63, R11, $56, R17
    87  	BNE	R16, R17, cmp_byte
    88  
    89  generic8_equal:
    90  	ADDV	$-8, R14
    91  	BEQ	R14, cmp_len
    92  	ADDV	$8, R4
    93  	ADDV	$8, R6
    94  	BGE	R14, R15, generic8_loop
    95  
    96  lt_8:
    97  	MOVV	$4, R15
    98  	BLT	R14, R15, lt_4
    99  
   100  	MOVWU	(R4), R10
   101  	MOVWU	(R6), R11
   102  	BEQ	R10, R11, lt_8_equal
   103  
   104  	AND	$0xff, R10, R16
   105  	AND	$0xff, R11, R17
   106  	BNE	R16, R17, cmp_byte
   107  
   108  	BSTRPICKV	$15, R10, $8, R16
   109  	BSTRPICKV	$15, R11, $8, R17
   110  	BNE	R16, R17, cmp_byte
   111  
   112  	BSTRPICKV	$23, R10, $16, R16
   113  	BSTRPICKV	$23, R11, $16, R17
   114  	BNE	R16, R17, cmp_byte
   115  
   116  	BSTRPICKV	$31, R10, $24, R16
   117  	BSTRPICKV	$31, R11, $24, R17
   118  	BNE	R16, R17, cmp_byte
   119  
   120  lt_8_equal:
   121  	ADDV	$-4, R14
   122  	BEQ	R14, cmp_len
   123  	ADDV	$4, R4
   124  	ADDV	$4, R6
   125  
   126  lt_4:
   127  	MOVV	$2, R15
   128  	BLT	R14, R15, lt_2
   129  
   130  	MOVHU	(R4), R10
   131  	MOVHU	(R6), R11
   132  	BEQ	R10, R11, lt_4_equal
   133  
   134  	AND	$0xff, R10, R16
   135  	AND	$0xff, R11, R17
   136  	BNE	R16, R17, cmp_byte
   137  
   138  	BSTRPICKV	$15, R10, $8, R16
   139  	BSTRPICKV	$15, R11, $8, R17
   140  	BNE	R16, R17, cmp_byte
   141  
   142  lt_4_equal:
   143  	ADDV	$-2, R14
   144  	BEQ	R14, cmp_len
   145  	ADDV	$2, R4
   146  	ADDV	$2, R6
   147  
   148  lt_2:
   149  	MOVBU	(R4), R16
   150  	MOVBU	(R6), R17
   151  	BNE	R16, R17, cmp_byte
   152  	JMP	cmp_len
   153  
   154  	// Compare 1 byte taken from R16/R17 that are known to differ.
   155  cmp_byte:
   156  	SGTU	R16, R17, R4	// R4 = 1 if (R16 > R17)
   157  	BNE	R0, R4, ret
   158  	MOVV	$-1, R4
   159  	RET
   160  
   161  cmp_len:
   162  	SGTU	R5, R7, R8
   163  	SGTU	R7, R5, R9
   164  	SUBV	R9, R8, R4
   165  
   166  ret:
   167  	RET
   168  
   169  lasx:
   170  	MOVV	$64, R20
   171  	MOVBU	internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R9
   172  	BEQ	R9, lsx
   173  
   174  	MOVV	$128, R15
   175  	BLT	R14, R15, lasx32_loop
   176  lasx128_loop:
   177  	XVMOVQ	(R4), X0
   178  	XVMOVQ	(R6), X1
   179  	XVSEQB	X0, X1, X0
   180  	XVSETANYEQB	X0, FCC0
   181  	BFPT	lasx_found_0
   182  
   183  	XVMOVQ	32(R4), X0
   184  	XVMOVQ	32(R6), X1
   185  	XVSEQB	X0, X1, X0
   186  	XVSETANYEQB	X0, FCC0
   187  	BFPT	lasx_found_32
   188  
   189  	XVMOVQ	64(R4), X0
   190  	XVMOVQ	64(R6), X1
   191  	XVSEQB	X0, X1, X0
   192  	XVSETANYEQB	X0, FCC0
   193  	BFPT	lasx_found_64
   194  
   195  	XVMOVQ	96(R4), X0
   196  	XVMOVQ	96(R6), X1
   197  	XVSEQB	X0, X1, X0
   198  	XVSETANYEQB	X0, FCC0
   199  	BFPT	lasx_found_96
   200  
   201  	ADDV	$-128, R14
   202  	BEQ	R14, cmp_len
   203  	ADDV	$128, R4
   204  	ADDV	$128, R6
   205  	BGE	R14, R15, lasx128_loop
   206  
   207  	MOVV	$32, R15
   208  	BLT	R14, R15, tail
   209  lasx32_loop:
   210  	XVMOVQ	(R4), X0
   211  	XVMOVQ	(R6), X1
   212  	XVSEQB	X0, X1, X0
   213  	XVSETANYEQB	X0, FCC0
   214  	BFPT	lasx_found_0
   215  
   216  	ADDV	$-32, R14
   217  	BEQ	R14, cmp_len
   218  	ADDV	$32, R4
   219  	ADDV	$32, R6
   220  	BGE	R14, R15, lasx32_loop
   221  	JMP	tail
   222  
   223  lasx_found_0:
   224  	MOVV	R0, R11
   225  	JMP	lasx_find_byte
   226  
   227  lasx_found_32:
   228  	MOVV	$32, R11
   229  	JMP	lasx_find_byte
   230  
   231  lasx_found_64:
   232  	MOVV	$64, R11
   233  	JMP	lasx_find_byte
   234  
   235  lasx_found_96:
   236  	MOVV	$96, R11
   237  
   238  lasx_find_byte:
   239  	XVMOVQ	X0.V[0], R10
   240  	CTOV	R10, R10
   241  	BNE	R10, R20, find_byte
   242  	ADDV	$8, R11
   243  
   244  	XVMOVQ	X0.V[1], R10
   245  	CTOV	R10, R10
   246  	BNE	R10, R20, find_byte
   247  	ADDV	$8, R11
   248  
   249  	XVMOVQ	X0.V[2], R10
   250  	CTOV	R10, R10
   251  	BNE	R10, R20, find_byte
   252  	ADDV	$8, R11
   253  
   254  	XVMOVQ	X0.V[3], R10
   255  	CTOV	R10, R10
   256  	JMP	find_byte
   257  
   258  lsx:
   259  	MOVBU	internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R9
   260  	BEQ	R9, generic32_loop
   261  
   262  	MOVV	$64, R15
   263  	BLT	R14, R15, lsx16_loop
   264  lsx64_loop:
   265  	VMOVQ	(R4), V0
   266  	VMOVQ	(R6), V1
   267  	VSEQB	V0, V1, V0
   268  	VSETANYEQB	V0, FCC0
   269  	BFPT	lsx_found_0
   270  
   271  	VMOVQ	16(R4), V0
   272  	VMOVQ	16(R6), V1
   273  	VSEQB	V0, V1, V0
   274  	VSETANYEQB	V0, FCC0
   275  	BFPT	lsx_found_16
   276  
   277  	VMOVQ	32(R4), V0
   278  	VMOVQ	32(R6), V1
   279  	VSEQB	V0, V1, V0
   280  	VSETANYEQB	V0, FCC0
   281  	BFPT	lsx_found_32
   282  
   283  	VMOVQ	48(R4), V0
   284  	VMOVQ	48(R6), V1
   285  	VSEQB	V0, V1, V0
   286  	VSETANYEQB	V0, FCC0
   287  	BFPT	lsx_found_48
   288  
   289  	ADDV	$-64, R14
   290  	BEQ	R14, cmp_len
   291  	ADDV	$64, R4
   292  	ADDV	$64, R6
   293  	BGE	R14, R15, lsx64_loop
   294  
   295  	MOVV	$16, R15
   296  	BLT	R14, R15, tail
   297  lsx16_loop:
   298  	VMOVQ	(R4), V0
   299  	VMOVQ	(R6), V1
   300  	VSEQB	V0, V1, V0
   301  	VSETANYEQB	V0, FCC0
   302  	BFPT	lsx_found_0
   303  
   304  	ADDV	$-16, R14
   305  	BEQ	R14, cmp_len
   306  	ADDV	$16, R4
   307  	ADDV	$16, R6
   308  	BGE	R14, R15, lsx16_loop
   309  	JMP	tail
   310  
   311  lsx_found_0:
   312  	MOVV	R0, R11
   313  	JMP	lsx_find_byte
   314  
   315  lsx_found_16:
   316  	MOVV	$16, R11
   317  	JMP	lsx_find_byte
   318  
   319  lsx_found_32:
   320  	MOVV	$32, R11
   321  	JMP	lsx_find_byte
   322  
   323  lsx_found_48:
   324  	MOVV	$48, R11
   325  
   326  lsx_find_byte:
   327  	VMOVQ	V0.V[0], R10
   328  	CTOV	R10, R10
   329  	BNE	R10, R20, find_byte
   330  	ADDV	$8, R11
   331  
   332  	VMOVQ	V0.V[1], R10
   333  	CTOV	R10, R10
   334  
   335  find_byte:
   336  	SRLV	$3, R10
   337  	ADDV	R10, R11
   338  	ADDV	R11, R4
   339  	ADDV	R11, R6
   340  	MOVB	(R4), R16
   341  	MOVB	(R6), R17
   342  	JMP	cmp_byte
   343  
   344  generic32_loop:
   345  	MOVV	(R4), R10
   346  	MOVV	(R6), R11
   347  	BNE	R10, R11, cmp8
   348  	MOVV	8(R4), R10
   349  	MOVV	8(R6), R11
   350  	BNE	R10, R11, cmp8
   351  	MOVV	16(R4), R10
   352  	MOVV	16(R6), R11
   353  	BNE	R10, R11, cmp8
   354  	MOVV	24(R4), R10
   355  	MOVV	24(R6), R11
   356  	BNE	R10, R11, cmp8
   357  	ADDV	$-32, R14
   358  	BEQ	R14, cmp_len
   359  	ADDV	$32, R4
   360  	ADDV	$32, R6
   361  	MOVV	$32, R15
   362  	BGE	R14, R15, generic32_loop
   363  	JMP	tail
   364  

View as plain text