1 // Copyright 2022 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "textflag.h"
7
8 TEXT ·Compare<ABIInternal>(SB),NOSPLIT,$0-56
9 // R4 = a_base
10 // R5 = a_len
11 // R6 = a_cap (unused)
12 // R7 = b_base (want in R6)
13 // R8 = b_len (want in R7)
14 // R9 = b_cap (unused)
15 MOVV R7, R6
16 MOVV R8, R7
17 JMP cmpbody<>(SB)
18
19 TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT,$0-40
20 // R4 = a_base
21 // R5 = a_len
22 // R6 = b_base
23 // R7 = b_len
24 JMP cmpbody<>(SB)
25
26 // input:
27 // R4: points to the start of a
28 // R5: length of a
29 // R6: points to the start of b
30 // R7: length of b
31 // for regabi the return value (-1/0/1) in R4
32 TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0
33 BEQ R4, R6, cmp_len // same start of a and b, then compare lengths
34
35 SGTU R5, R7, R9
36 BNE R9, b_lt_a
37 MOVV R5, R14
38 JMP entry
39
40 b_lt_a:
41 MOVV R7, R14
42
43 entry:
44 BEQ R14, cmp_len // minlength is 0
45
46 MOVV $32, R15
47 BGE R14, R15, lasx
48 tail:
49 MOVV $8, R15
50 BLT R14, R15, lt_8
51 generic8_loop:
52 MOVV (R4), R10
53 MOVV (R6), R11
54 BEQ R10, R11, generic8_equal
55
56 cmp8:
57 AND $0xff, R10, R16
58 AND $0xff, R11, R17
59 BNE R16, R17, cmp_byte
60
61 BSTRPICKV $15, R10, $8, R16
62 BSTRPICKV $15, R11, $8, R17
63 BNE R16, R17, cmp_byte
64
65 BSTRPICKV $23, R10, $16, R16
66 BSTRPICKV $23, R11, $16, R17
67 BNE R16, R17, cmp_byte
68
69 BSTRPICKV $31, R10, $24, R16
70 BSTRPICKV $31, R11, $24, R17
71 BNE R16, R17, cmp_byte
72
73 BSTRPICKV $39, R10, $32, R16
74 BSTRPICKV $39, R11, $32, R17
75 BNE R16, R17, cmp_byte
76
77 BSTRPICKV $47, R10, $40, R16
78 BSTRPICKV $47, R11, $40, R17
79 BNE R16, R17, cmp_byte
80
81 BSTRPICKV $55, R10, $48, R16
82 BSTRPICKV $55, R11, $48, R17
83 BNE R16, R17, cmp_byte
84
85 BSTRPICKV $63, R10, $56, R16
86 BSTRPICKV $63, R11, $56, R17
87 BNE R16, R17, cmp_byte
88
89 generic8_equal:
90 ADDV $-8, R14
91 BEQ R14, cmp_len
92 ADDV $8, R4
93 ADDV $8, R6
94 BGE R14, R15, generic8_loop
95
96 lt_8:
97 MOVV $4, R15
98 BLT R14, R15, lt_4
99
100 MOVWU (R4), R10
101 MOVWU (R6), R11
102 BEQ R10, R11, lt_8_equal
103
104 AND $0xff, R10, R16
105 AND $0xff, R11, R17
106 BNE R16, R17, cmp_byte
107
108 BSTRPICKV $15, R10, $8, R16
109 BSTRPICKV $15, R11, $8, R17
110 BNE R16, R17, cmp_byte
111
112 BSTRPICKV $23, R10, $16, R16
113 BSTRPICKV $23, R11, $16, R17
114 BNE R16, R17, cmp_byte
115
116 BSTRPICKV $31, R10, $24, R16
117 BSTRPICKV $31, R11, $24, R17
118 BNE R16, R17, cmp_byte
119
120 lt_8_equal:
121 ADDV $-4, R14
122 BEQ R14, cmp_len
123 ADDV $4, R4
124 ADDV $4, R6
125
126 lt_4:
127 MOVV $2, R15
128 BLT R14, R15, lt_2
129
130 MOVHU (R4), R10
131 MOVHU (R6), R11
132 BEQ R10, R11, lt_4_equal
133
134 AND $0xff, R10, R16
135 AND $0xff, R11, R17
136 BNE R16, R17, cmp_byte
137
138 BSTRPICKV $15, R10, $8, R16
139 BSTRPICKV $15, R11, $8, R17
140 BNE R16, R17, cmp_byte
141
142 lt_4_equal:
143 ADDV $-2, R14
144 BEQ R14, cmp_len
145 ADDV $2, R4
146 ADDV $2, R6
147
148 lt_2:
149 MOVBU (R4), R16
150 MOVBU (R6), R17
151 BNE R16, R17, cmp_byte
152 JMP cmp_len
153
154 // Compare 1 byte taken from R16/R17 that are known to differ.
155 cmp_byte:
156 SGTU R16, R17, R4 // R4 = 1 if (R16 > R17)
157 BNE R0, R4, ret
158 MOVV $-1, R4
159 RET
160
161 cmp_len:
162 SGTU R5, R7, R8
163 SGTU R7, R5, R9
164 SUBV R9, R8, R4
165
166 ret:
167 RET
168
169 lasx:
170 MOVV $64, R20
171 MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R9
172 BEQ R9, lsx
173
174 MOVV $128, R15
175 BLT R14, R15, lasx32_loop
176 lasx128_loop:
177 XVMOVQ (R4), X0
178 XVMOVQ (R6), X1
179 XVSEQB X0, X1, X0
180 XVSETANYEQB X0, FCC0
181 BFPT lasx_found_0
182
183 XVMOVQ 32(R4), X0
184 XVMOVQ 32(R6), X1
185 XVSEQB X0, X1, X0
186 XVSETANYEQB X0, FCC0
187 BFPT lasx_found_32
188
189 XVMOVQ 64(R4), X0
190 XVMOVQ 64(R6), X1
191 XVSEQB X0, X1, X0
192 XVSETANYEQB X0, FCC0
193 BFPT lasx_found_64
194
195 XVMOVQ 96(R4), X0
196 XVMOVQ 96(R6), X1
197 XVSEQB X0, X1, X0
198 XVSETANYEQB X0, FCC0
199 BFPT lasx_found_96
200
201 ADDV $-128, R14
202 BEQ R14, cmp_len
203 ADDV $128, R4
204 ADDV $128, R6
205 BGE R14, R15, lasx128_loop
206
207 MOVV $32, R15
208 BLT R14, R15, tail
209 lasx32_loop:
210 XVMOVQ (R4), X0
211 XVMOVQ (R6), X1
212 XVSEQB X0, X1, X0
213 XVSETANYEQB X0, FCC0
214 BFPT lasx_found_0
215
216 ADDV $-32, R14
217 BEQ R14, cmp_len
218 ADDV $32, R4
219 ADDV $32, R6
220 BGE R14, R15, lasx32_loop
221 JMP tail
222
223 lasx_found_0:
224 MOVV R0, R11
225 JMP lasx_find_byte
226
227 lasx_found_32:
228 MOVV $32, R11
229 JMP lasx_find_byte
230
231 lasx_found_64:
232 MOVV $64, R11
233 JMP lasx_find_byte
234
235 lasx_found_96:
236 MOVV $96, R11
237
238 lasx_find_byte:
239 XVMOVQ X0.V[0], R10
240 CTOV R10, R10
241 BNE R10, R20, find_byte
242 ADDV $8, R11
243
244 XVMOVQ X0.V[1], R10
245 CTOV R10, R10
246 BNE R10, R20, find_byte
247 ADDV $8, R11
248
249 XVMOVQ X0.V[2], R10
250 CTOV R10, R10
251 BNE R10, R20, find_byte
252 ADDV $8, R11
253
254 XVMOVQ X0.V[3], R10
255 CTOV R10, R10
256 JMP find_byte
257
258 lsx:
259 MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R9
260 BEQ R9, generic32_loop
261
262 MOVV $64, R15
263 BLT R14, R15, lsx16_loop
264 lsx64_loop:
265 VMOVQ (R4), V0
266 VMOVQ (R6), V1
267 VSEQB V0, V1, V0
268 VSETANYEQB V0, FCC0
269 BFPT lsx_found_0
270
271 VMOVQ 16(R4), V0
272 VMOVQ 16(R6), V1
273 VSEQB V0, V1, V0
274 VSETANYEQB V0, FCC0
275 BFPT lsx_found_16
276
277 VMOVQ 32(R4), V0
278 VMOVQ 32(R6), V1
279 VSEQB V0, V1, V0
280 VSETANYEQB V0, FCC0
281 BFPT lsx_found_32
282
283 VMOVQ 48(R4), V0
284 VMOVQ 48(R6), V1
285 VSEQB V0, V1, V0
286 VSETANYEQB V0, FCC0
287 BFPT lsx_found_48
288
289 ADDV $-64, R14
290 BEQ R14, cmp_len
291 ADDV $64, R4
292 ADDV $64, R6
293 BGE R14, R15, lsx64_loop
294
295 MOVV $16, R15
296 BLT R14, R15, tail
297 lsx16_loop:
298 VMOVQ (R4), V0
299 VMOVQ (R6), V1
300 VSEQB V0, V1, V0
301 VSETANYEQB V0, FCC0
302 BFPT lsx_found_0
303
304 ADDV $-16, R14
305 BEQ R14, cmp_len
306 ADDV $16, R4
307 ADDV $16, R6
308 BGE R14, R15, lsx16_loop
309 JMP tail
310
311 lsx_found_0:
312 MOVV R0, R11
313 JMP lsx_find_byte
314
315 lsx_found_16:
316 MOVV $16, R11
317 JMP lsx_find_byte
318
319 lsx_found_32:
320 MOVV $32, R11
321 JMP lsx_find_byte
322
323 lsx_found_48:
324 MOVV $48, R11
325
326 lsx_find_byte:
327 VMOVQ V0.V[0], R10
328 CTOV R10, R10
329 BNE R10, R20, find_byte
330 ADDV $8, R11
331
332 VMOVQ V0.V[1], R10
333 CTOV R10, R10
334
335 find_byte:
336 SRLV $3, R10
337 ADDV R10, R11
338 ADDV R11, R4
339 ADDV R11, R6
340 MOVB (R4), R16
341 MOVB (R6), R17
342 JMP cmp_byte
343
344 generic32_loop:
345 MOVV (R4), R10
346 MOVV (R6), R11
347 BNE R10, R11, cmp8
348 MOVV 8(R4), R10
349 MOVV 8(R6), R11
350 BNE R10, R11, cmp8
351 MOVV 16(R4), R10
352 MOVV 16(R6), R11
353 BNE R10, R11, cmp8
354 MOVV 24(R4), R10
355 MOVV 24(R6), R11
356 BNE R10, R11, cmp8
357 ADDV $-32, R14
358 BEQ R14, cmp_len
359 ADDV $32, R4
360 ADDV $32, R6
361 MOVV $32, R15
362 BGE R14, R15, generic32_loop
363 JMP tail
364
View as plain text