Source file src/math/big/internal/asmgen/asm.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package asmgen
     6  
     7  import (
     8  	"bytes"
     9  	"cmp"
    10  	"fmt"
    11  	"math/bits"
    12  	"slices"
    13  	"strings"
    14  )
    15  
    16  // Note: Exported fields and methods are expected to be used
    17  // by function generators (like the ones in add.go and so on).
    18  // Unexported fields and methods should not be.
    19  
    20  // An Asm is an assembly file being written.
    21  type Asm struct {
    22  	Arch     *Arch           // architecture
    23  	out      bytes.Buffer    // output buffer
    24  	regavail uint64          // bitmap of available registers
    25  	enabled  map[Option]bool // enabled optional CPU features
    26  }
    27  
    28  // NewAsm returns a new Asm preparing assembly
    29  // for the given architecture to be written to file.
    30  func NewAsm(arch *Arch) *Asm {
    31  	a := &Asm{Arch: arch, enabled: make(map[Option]bool)}
    32  	buildTag := ""
    33  	if arch.Build != "" {
    34  		buildTag = " && (" + arch.Build + ")"
    35  	}
    36  	a.Printf(asmHeader, buildTag)
    37  	return a
    38  }
    39  
    40  // Note: Using Copyright 2025, not the current year, to avoid test failures
    41  // on January 1 and spurious diffs when regenerating assembly.
    42  // The generator was written in 2025; that's good enough.
    43  // (As a matter of policy the Go project does not update copyright
    44  // notices every year, since copyright terms are so long anyway.)
    45  
    46  var asmHeader = `// Copyright 2025 The Go Authors. All rights reserved.
    47  // Use of this source code is governed by a BSD-style
    48  // license that can be found in the LICENSE file.
    49  
    50  // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
    51  
    52  //go:build !math_big_pure_go%s
    53  
    54  #include "textflag.h"
    55  `
    56  
    57  // Fatalf reports a fatal error by panicking.
    58  // Panicking is appropriate because there is a bug in the generator,
    59  // and panicking will show the exact source lines leading to that bug.
    60  func (a *Asm) Fatalf(format string, args ...any) {
    61  	text := a.out.String()
    62  	i := strings.LastIndex(text, "\nTEXT")
    63  	text = text[i+1:]
    64  	panic("[" + a.Arch.Name + "] asmgen internal error: " + fmt.Sprintf(format, args...) + "\n" + text)
    65  }
    66  
    67  // hint returns the register name for the given hint.
    68  func (a *Asm) hint(h Hint) string {
    69  	if h == HintCarry && a.Arch.regCarry != "" {
    70  		return a.Arch.regCarry
    71  	}
    72  	if h == HintAltCarry && a.Arch.regAltCarry != "" {
    73  		return a.Arch.regAltCarry
    74  	}
    75  	if h == HintNone || a.Arch.hint == nil {
    76  		return ""
    77  	}
    78  	return a.Arch.hint(a, h)
    79  }
    80  
    81  // ZR returns the zero register (the specific register guaranteed to hold the integer 0),
    82  // or else the zero Reg (Reg{}, which has r.Valid() == false).
    83  func (a *Asm) ZR() Reg {
    84  	return Reg{a.Arch.reg0}
    85  }
    86  
    87  // tmp returns the temporary register, or else the zero Reg.
    88  // The temporary register is one available for use implementing logical instructions
    89  // that compile into multiple actual instructions on a given system.
    90  // The assembler sometimes uses it for that purpose, as do we.
    91  // Of course, if we are using it, we'd better not emit an instruction that
    92  // will cause the assembler to smash it while we want it to be holding
    93  // a live value. In general it is the architecture implementation's responsibility
    94  // not to suggest the use of any such pseudo-instructions in situations
    95  // where they would cause problems.
    96  func (a *Asm) tmp() Reg {
    97  	return Reg{a.Arch.regTmp}
    98  }
    99  
   100  // Carry returns the carry register, or else the zero Reg.
   101  func (a *Asm) Carry() Reg {
   102  	return Reg{a.Arch.regCarry}
   103  }
   104  
   105  // AltCarry returns the secondary carry register, or else the zero Reg.
   106  func (a *Asm) AltCarry() Reg {
   107  	return Reg{a.Arch.regAltCarry}
   108  }
   109  
   110  // Imm returns a Reg representing an immediate (constant) value.
   111  func (a *Asm) Imm(x int) Reg {
   112  	if x == 0 && a.Arch.reg0 != "" {
   113  		return Reg{a.Arch.reg0}
   114  	}
   115  	return Reg{fmt.Sprintf("$%d", x)}
   116  }
   117  
   118  // IsZero reports whether r is a zero immediate or the zero register.
   119  func (a *Asm) IsZero(r Reg) bool {
   120  	return r.name == "$0" || a.Arch.reg0 != "" && r.name == a.Arch.reg0
   121  }
   122  
   123  // Reg allocates a new register.
   124  func (a *Asm) Reg() Reg {
   125  	i := bits.TrailingZeros64(a.regavail)
   126  	if i == 64 {
   127  		a.Fatalf("out of registers")
   128  	}
   129  	a.regavail ^= 1 << i
   130  	return Reg{a.Arch.regs[i]}
   131  }
   132  
   133  // RegHint allocates a new register, with a hint as to its purpose.
   134  func (a *Asm) RegHint(hint Hint) Reg {
   135  	if name := a.hint(hint); name != "" {
   136  		i := slices.Index(a.Arch.regs, name)
   137  		if i < 0 {
   138  			return Reg{name}
   139  		}
   140  		if a.regavail&(1<<i) == 0 {
   141  			a.Fatalf("hint for already allocated register %s", name)
   142  		}
   143  		a.regavail &^= 1 << i
   144  		return Reg{name}
   145  	}
   146  	return a.Reg()
   147  }
   148  
   149  // Free frees a previously allocated register.
   150  // If r is not a register (if it's an immediate or a memory reference), Free is a no-op.
   151  func (a *Asm) Free(r Reg) {
   152  	i := slices.Index(a.Arch.regs, r.name)
   153  	if i < 0 {
   154  		return
   155  	}
   156  	if a.regavail&(1<<i) != 0 {
   157  		a.Fatalf("register %s already freed", r.name)
   158  	}
   159  	a.regavail |= 1 << i
   160  }
   161  
   162  // Unfree reallocates a previously freed register r.
   163  // If r is not a register (if it's an immediate or a memory reference), Unfree is a no-op.
   164  // If r is not free for allocation, Unfree panics.
   165  // A Free paired with Unfree can release a register for use temporarily
   166  // but then reclaim it, such as at the end of a loop body when it must be restored.
   167  func (a *Asm) Unfree(r Reg) {
   168  	i := slices.Index(a.Arch.regs, r.name)
   169  	if i < 0 {
   170  		return
   171  	}
   172  	if a.regavail&(1<<i) == 0 {
   173  		a.Fatalf("register %s not free", r.name)
   174  	}
   175  	a.regavail &^= 1 << i
   176  }
   177  
   178  // A RegsUsed is a snapshot of which registers are allocated.
   179  type RegsUsed struct {
   180  	avail uint64
   181  }
   182  
   183  // RegsUsed returns a snapshot of which registers are currently allocated,
   184  // which can be passed to a future call to [Asm.SetRegsUsed].
   185  func (a *Asm) RegsUsed() RegsUsed {
   186  	return RegsUsed{a.regavail}
   187  }
   188  
   189  // SetRegsUsed sets which registers are currently allocated.
   190  // The argument should have been returned from a previous
   191  // call to [Asm.RegsUsed].
   192  func (a *Asm) SetRegsUsed(used RegsUsed) {
   193  	a.regavail = used.avail
   194  }
   195  
   196  // FreeAll frees all known registers.
   197  func (a *Asm) FreeAll() {
   198  	a.regavail = 1<<len(a.Arch.regs) - 1
   199  }
   200  
   201  // Printf emits to the assembly output.
   202  func (a *Asm) Printf(format string, args ...any) {
   203  	text := fmt.Sprintf(format, args...)
   204  	if strings.Contains(text, "%!") {
   205  		a.Fatalf("printf error: %s", text)
   206  	}
   207  	a.out.WriteString(text)
   208  }
   209  
   210  // Comment emits a line comment to the assembly output.
   211  func (a *Asm) Comment(format string, args ...any) {
   212  	fmt.Fprintf(&a.out, "\t// %s\n", fmt.Sprintf(format, args...))
   213  }
   214  
   215  // EOL appends an end-of-line comment to the previous line.
   216  func (a *Asm) EOL(format string, args ...any) {
   217  	bytes := a.out.Bytes()
   218  	if len(bytes) > 0 && bytes[len(bytes)-1] == '\n' {
   219  		a.out.Truncate(a.out.Len() - 1)
   220  	}
   221  	a.Comment(format, args...)
   222  }
   223  
   224  // JmpEnable emits a test for the optional CPU feature that jumps to label if the feature is present.
   225  // If JmpEnable returns false, the feature is not available on this architecture and no code was emitted.
   226  func (a *Asm) JmpEnable(option Option, label string) bool {
   227  	jmpEnable := a.Arch.options[option]
   228  	if jmpEnable == nil {
   229  		return false
   230  	}
   231  	jmpEnable(a, label)
   232  	return true
   233  }
   234  
   235  // Enabled reports whether the optional CPU feature is considered
   236  // to be enabled at this point in the assembly output.
   237  func (a *Asm) Enabled(option Option) bool {
   238  	return a.enabled[option]
   239  }
   240  
   241  // SetOption changes whether the optional CPU feature should be
   242  // considered to be enabled.
   243  func (a *Asm) SetOption(option Option, on bool) {
   244  	a.enabled[option] = on
   245  }
   246  
   247  // op3 emits a 3-operand instruction op src1, src2, dst,
   248  // taking care to handle 2-operand machines and also
   249  // to simplify the printout when src2==dst.
   250  func (a *Asm) op3(op string, src1, src2, dst Reg) {
   251  	if op == "" {
   252  		a.Fatalf("missing instruction")
   253  	}
   254  	if src2 == dst {
   255  		// src2 and dst are same; print as 2-op form.
   256  		a.Printf("\t%s %s, %s\n", op, src1, dst)
   257  	} else if a.Arch.op3 != nil && !a.Arch.op3(op) {
   258  		// Machine does not have 3-op form for op; convert to 2-op.
   259  		if src1 == dst {
   260  			a.Fatalf("implicit mov %s, %s would smash src1", src2, dst)
   261  		}
   262  		a.Mov(src2, dst)
   263  		a.Printf("\t%s %s, %s\n", op, src1, dst)
   264  	} else {
   265  		// Full 3-op form.
   266  		a.Printf("\t%s %s, %s, %s\n", op, src1, src2, dst)
   267  	}
   268  }
   269  
   270  // Mov emits dst = src.
   271  func (a *Asm) Mov(src, dst Reg) {
   272  	if src != dst {
   273  		a.Printf("\t%s %s, %s\n", a.Arch.mov, src, dst)
   274  	}
   275  }
   276  
   277  // AddWords emits dst = src1*WordBytes + src2.
   278  // It does not set or use the carry flag.
   279  func (a *Asm) AddWords(src1 Reg, src2, dst RegPtr) {
   280  	if a.Arch.addWords == "" {
   281  		// Note: Assuming that Lsh does not clobber the carry flag.
   282  		// Architectures where this is not true (x86) need to provide Arch.addWords.
   283  		t := a.Reg()
   284  		a.Lsh(a.Imm(bits.TrailingZeros(uint(a.Arch.WordBytes))), src1, t)
   285  		a.Add(t, Reg(src2), Reg(dst), KeepCarry)
   286  		a.Free(t)
   287  		return
   288  	}
   289  	a.Printf("\t"+a.Arch.addWords+"\n", src1, src2, dst)
   290  }
   291  
   292  // And emits dst = src1 & src2
   293  // It may modify the carry flag.
   294  func (a *Asm) And(src1, src2, dst Reg) {
   295  	a.op3(a.Arch.and, src1, src2, dst)
   296  }
   297  
   298  // Or emits dst = src1 | src2
   299  // It may modify the carry flag.
   300  func (a *Asm) Or(src1, src2, dst Reg) {
   301  	a.op3(a.Arch.or, src1, src2, dst)
   302  }
   303  
   304  // Xor emits dst = src1 ^ src2
   305  // It may modify the carry flag.
   306  func (a *Asm) Xor(src1, src2, dst Reg) {
   307  	a.op3(a.Arch.xor, src1, src2, dst)
   308  }
   309  
   310  // Neg emits dst = -src.
   311  // It may modify the carry flag.
   312  func (a *Asm) Neg(src, dst Reg) {
   313  	if a.Arch.neg == "" {
   314  		if a.Arch.rsb != "" {
   315  			a.Printf("\t%s $0, %s, %s\n", a.Arch.rsb, src, dst)
   316  			return
   317  		}
   318  		if a.Arch.sub != "" && a.Arch.reg0 != "" {
   319  			a.Printf("\t%s %s, %s, %s\n", a.Arch.sub, src, a.Arch.reg0, dst)
   320  			return
   321  		}
   322  		a.Fatalf("missing neg")
   323  	}
   324  	if src == dst {
   325  		a.Printf("\t%s %s\n", a.Arch.neg, dst)
   326  	} else {
   327  		a.Printf("\t%s %s, %s\n", a.Arch.neg, src, dst)
   328  	}
   329  }
   330  
   331  // HasRegShift reports whether the architecture can use shift expressions as operands.
   332  func (a *Asm) HasRegShift() bool {
   333  	return a.Arch.regShift
   334  }
   335  
   336  // LshReg returns a shift-expression operand src<<shift.
   337  // If a.HasRegShift() == false, LshReg panics.
   338  func (a *Asm) LshReg(shift, src Reg) Reg {
   339  	if !a.HasRegShift() {
   340  		a.Fatalf("no reg shift")
   341  	}
   342  	return Reg{fmt.Sprintf("%s<<%s", src, strings.TrimPrefix(shift.name, "$"))}
   343  }
   344  
   345  // Lsh emits dst = src << shift.
   346  // It may modify the carry flag.
   347  func (a *Asm) Lsh(shift, src, dst Reg) {
   348  	if need := a.hint(HintShiftCount); need != "" && shift.name != need && !shift.IsImm() {
   349  		a.Fatalf("shift count not in %s", need)
   350  	}
   351  	if a.HasRegShift() {
   352  		a.Mov(a.LshReg(shift, src), dst)
   353  		return
   354  	}
   355  	a.op3(a.Arch.lsh, shift, src, dst)
   356  }
   357  
   358  // LshWide emits dst = src << shift with low bits shifted from adj.
   359  // It may modify the carry flag.
   360  func (a *Asm) LshWide(shift, adj, src, dst Reg) {
   361  	if a.Arch.lshd == "" {
   362  		a.Fatalf("no lshwide on %s", a.Arch.Name)
   363  	}
   364  	if need := a.hint(HintShiftCount); need != "" && shift.name != need && !shift.IsImm() {
   365  		a.Fatalf("shift count not in %s", need)
   366  	}
   367  	a.op3(fmt.Sprintf("%s %s,", a.Arch.lshd, shift), adj, src, dst)
   368  }
   369  
   370  // RshReg returns a shift-expression operand src>>shift.
   371  // If a.HasRegShift() == false, RshReg panics.
   372  func (a *Asm) RshReg(shift, src Reg) Reg {
   373  	if !a.HasRegShift() {
   374  		a.Fatalf("no reg shift")
   375  	}
   376  	return Reg{fmt.Sprintf("%s>>%s", src, strings.TrimPrefix(shift.name, "$"))}
   377  }
   378  
   379  // Rsh emits dst = src >> shift.
   380  // It may modify the carry flag.
   381  func (a *Asm) Rsh(shift, src, dst Reg) {
   382  	if need := a.hint(HintShiftCount); need != "" && shift.name != need && !shift.IsImm() {
   383  		a.Fatalf("shift count not in %s", need)
   384  	}
   385  	if a.HasRegShift() {
   386  		a.Mov(a.RshReg(shift, src), dst)
   387  		return
   388  	}
   389  	a.op3(a.Arch.rsh, shift, src, dst)
   390  }
   391  
   392  // RshWide emits dst = src >> shift with high bits shifted from adj.
   393  // It may modify the carry flag.
   394  func (a *Asm) RshWide(shift, adj, src, dst Reg) {
   395  	if a.Arch.lshd == "" {
   396  		a.Fatalf("no rshwide on %s", a.Arch.Name)
   397  	}
   398  	if need := a.hint(HintShiftCount); need != "" && shift.name != need && !shift.IsImm() {
   399  		a.Fatalf("shift count not in %s", need)
   400  	}
   401  	a.op3(fmt.Sprintf("%s %s,", a.Arch.rshd, shift), adj, src, dst)
   402  }
   403  
   404  // SLTU emits dst = src2 < src1 (0 or 1), using an unsigned comparison.
   405  func (a *Asm) SLTU(src1, src2, dst Reg) {
   406  	switch {
   407  	default:
   408  		a.Fatalf("arch has no sltu/sgtu")
   409  	case a.Arch.sltu != "":
   410  		a.Printf("\t%s %s, %s, %s\n", a.Arch.sltu, src1, src2, dst)
   411  	case a.Arch.sgtu != "":
   412  		a.Printf("\t%s %s, %s, %s\n", a.Arch.sgtu, src2, src1, dst)
   413  	}
   414  }
   415  
   416  // Add emits dst = src1+src2, with the specified carry behavior.
   417  func (a *Asm) Add(src1, src2, dst Reg, carry Carry) {
   418  	switch {
   419  	default:
   420  		a.Fatalf("unsupported carry behavior")
   421  	case a.Arch.addF != nil && a.Arch.addF(a, src1, src2, dst, carry):
   422  		// handled
   423  	case a.Arch.add != "" && (carry == KeepCarry || carry == SmashCarry):
   424  		a.op3(a.Arch.add, src1, src2, dst)
   425  	case a.Arch.adds != "" && (carry == SetCarry || carry == SmashCarry):
   426  		a.op3(a.Arch.adds, src1, src2, dst)
   427  	case a.Arch.adc != "" && (carry == UseCarry || carry == UseCarry|SmashCarry):
   428  		a.op3(a.Arch.adc, src1, src2, dst)
   429  	case a.Arch.adcs != "" && (carry == UseCarry|SetCarry || carry == UseCarry|SmashCarry):
   430  		a.op3(a.Arch.adcs, src1, src2, dst)
   431  	case a.Arch.lea != "" && (carry == KeepCarry || carry == SmashCarry):
   432  		if src1.IsImm() {
   433  			a.Printf("\t%s %s(%s), %s\n", a.Arch.lea, src1.name[1:], src2, dst) // name[1:] removes $
   434  		} else {
   435  			a.Printf("\t%s (%s)(%s), %s\n", a.Arch.lea, src1, src2, dst)
   436  		}
   437  		if src2 == dst {
   438  			a.EOL("ADD %s, %s", src1, dst)
   439  		} else {
   440  			a.EOL("ADD %s, %s, %s", src1, src2, dst)
   441  		}
   442  
   443  	case a.Arch.add != "" && a.Arch.regCarry != "":
   444  		// Machine has no carry flag; instead we've dedicated a register
   445  		// and use SLTU/SGTU (set less-than/greater-than unsigned)
   446  		// to compute the carry flags as needed.
   447  		// For ADD x, y, z, SLTU x/y, z, c computes the carry (borrow) bit.
   448  		// Either of x or y can be used as the second argument, provided
   449  		// it is not aliased to z.
   450  		// To make the output less of a wall of instructions,
   451  		// we comment the “higher-level” operation, with ... marking
   452  		// continued instructions implementing the operation.
   453  		cr := a.Carry()
   454  		if carry&AltCarry != 0 {
   455  			cr = a.AltCarry()
   456  			if !cr.Valid() {
   457  				a.Fatalf("alt carry not supported")
   458  			}
   459  			carry &^= AltCarry
   460  		}
   461  		tmp := a.tmp()
   462  		if !tmp.Valid() {
   463  			a.Fatalf("cannot simulate sub carry without regTmp")
   464  		}
   465  		switch carry {
   466  		default:
   467  			a.Fatalf("unsupported carry behavior")
   468  		case UseCarry, UseCarry | SmashCarry:
   469  			// Easy case, just add the carry afterward.
   470  			if a.IsZero(src1) {
   471  				// Only here to use the carry.
   472  				a.Add(cr, src2, dst, KeepCarry)
   473  				a.EOL("ADC $0, %s, %s", src2, dst)
   474  				break
   475  			}
   476  			a.Add(src1, src2, dst, KeepCarry)
   477  			a.EOL("ADC %s, %s, %s (cr=%s)", src1, src2, dst, cr)
   478  			a.Add(cr, dst, dst, KeepCarry)
   479  			a.EOL("...")
   480  
   481  		case SetCarry:
   482  			if a.IsZero(src1) && src2 == dst {
   483  				// Only here to clear the carry flag. (Caller will comment.)
   484  				a.Xor(cr, cr, cr)
   485  				break
   486  			}
   487  			var old Reg // old is a src distinct from dst
   488  			switch {
   489  			case dst != src1:
   490  				old = src1
   491  			case dst != src2:
   492  				old = src2
   493  			default:
   494  				// src1 == src2 == dst.
   495  				// Overflows if and only if the high bit is set, so copy high bit to carry.
   496  				a.Rsh(a.Imm(a.Arch.WordBits-1), src1, cr)
   497  				a.EOL("ADDS %s, %s, %s (cr=%s)", src1, src2, dst, cr)
   498  				a.Add(src1, src2, dst, KeepCarry)
   499  				a.EOL("...")
   500  				return
   501  			}
   502  			a.Add(src1, src2, dst, KeepCarry)
   503  			a.EOL("ADDS %s, %s, %s (cr=%s)", src1, src2, dst, cr)
   504  			a.SLTU(old, dst, cr) // dst < old (one of the src) implies carry
   505  			a.EOL("...")
   506  
   507  		case UseCarry | SetCarry:
   508  			if a.IsZero(src1) {
   509  				// Only here to use and then set the carry.
   510  				// Easy since carry is not aliased to dst.
   511  				a.Add(cr, src2, dst, KeepCarry)
   512  				a.EOL("ADCS $0, %s, %s (cr=%s)", src2, dst, cr)
   513  				a.SLTU(cr, dst, cr) // dst < cr implies carry
   514  				a.EOL("...")
   515  				break
   516  			}
   517  			// General case. Need to do two different adds (src1 + src2 + cr),
   518  			// computing carry bits for both, and add'ing them together.
   519  			// Start with src1+src2.
   520  			var old Reg // old is a src distinct from dst
   521  			switch {
   522  			case dst != src1:
   523  				old = src1
   524  			case dst != src2:
   525  				old = src2
   526  			}
   527  			if old.Valid() {
   528  				a.Add(src1, src2, dst, KeepCarry)
   529  				a.EOL("ADCS %s, %s, %s (cr=%s)", src1, src2, dst, cr)
   530  				a.SLTU(old, dst, tmp) // // dst < old (one of the src) implies carry
   531  				a.EOL("...")
   532  			} else {
   533  				// src1 == src2 == dst, like above. Sign bit is carry bit,
   534  				// but we copy it into tmp, not cr.
   535  				a.Rsh(a.Imm(a.Arch.WordBits-1), src1, tmp)
   536  				a.EOL("ADCS %s, %s, %s (cr=%s)", src1, src2, dst, cr)
   537  				a.Add(src1, src2, dst, KeepCarry)
   538  				a.EOL("...")
   539  			}
   540  			// Add cr to dst.
   541  			a.Add(cr, dst, dst, KeepCarry)
   542  			a.EOL("...")
   543  			a.SLTU(cr, dst, cr) // sum < cr implies carry
   544  			a.EOL("...")
   545  			// Add the two carry bits (at most one can be set, because (2⁶⁴-1)+(2⁶⁴-1)+1 < 2·2⁶⁴).
   546  			a.Add(tmp, cr, cr, KeepCarry)
   547  			a.EOL("...")
   548  		}
   549  	}
   550  }
   551  
   552  // Sub emits dst = src2-src1, with the specified carry behavior.
   553  func (a *Asm) Sub(src1, src2, dst Reg, carry Carry) {
   554  	switch {
   555  	default:
   556  		a.Fatalf("unsupported carry behavior")
   557  	case a.Arch.subF != nil && a.Arch.subF(a, src1, src2, dst, carry):
   558  		// handled
   559  	case a.Arch.sub != "" && (carry == KeepCarry || carry == SmashCarry):
   560  		a.op3(a.Arch.sub, src1, src2, dst)
   561  	case a.Arch.subs != "" && (carry == SetCarry || carry == SmashCarry):
   562  		a.op3(a.Arch.subs, src1, src2, dst)
   563  	case a.Arch.sbc != "" && (carry == UseCarry || carry == UseCarry|SmashCarry):
   564  		a.op3(a.Arch.sbc, src1, src2, dst)
   565  	case a.Arch.sbcs != "" && (carry == UseCarry|SetCarry || carry == UseCarry|SmashCarry):
   566  		a.op3(a.Arch.sbcs, src1, src2, dst)
   567  	case strings.HasPrefix(src1.name, "$") && (carry == KeepCarry || carry == SmashCarry):
   568  		// Running out of options; if this is an immediate
   569  		// and we don't need to worry about carry semantics,
   570  		// try adding the negation.
   571  		if strings.HasPrefix(src1.name, "$-") {
   572  			src1.name = "$" + src1.name[2:]
   573  		} else {
   574  			src1.name = "$-" + src1.name[1:]
   575  		}
   576  		a.Add(src1, src2, dst, carry)
   577  
   578  	case a.Arch.sub != "" && a.Arch.regCarry != "":
   579  		// Machine has no carry flag; instead we've dedicated a register
   580  		// and use SLTU/SGTU (set less-than/greater-than unsigned)
   581  		// to compute the carry bits as needed.
   582  		// For SUB x, y, z, SLTU x, y, c computes the carry (borrow) bit.
   583  		// To make the output less of a wall of instructions,
   584  		// we comment the “higher-level” operation, with ... marking
   585  		// continued instructions implementing the operation.
   586  		// Be careful! Subtract and add have different overflow behaviors,
   587  		// so the details here are NOT the same as in Add above.
   588  		cr := a.Carry()
   589  		if carry&AltCarry != 0 {
   590  			a.Fatalf("alt carry not supported")
   591  		}
   592  		tmp := a.tmp()
   593  		if !tmp.Valid() {
   594  			a.Fatalf("cannot simulate carry without regTmp")
   595  		}
   596  		switch carry {
   597  		default:
   598  			a.Fatalf("unsupported carry behavior")
   599  		case UseCarry, UseCarry | SmashCarry:
   600  			// Easy case, just subtract the carry afterward.
   601  			if a.IsZero(src1) {
   602  				// Only here to use the carry.
   603  				a.Sub(cr, src2, dst, KeepCarry)
   604  				a.EOL("SBC $0, %s, %s", src2, dst)
   605  				break
   606  			}
   607  			a.Sub(src1, src2, dst, KeepCarry)
   608  			a.EOL("SBC %s, %s, %s", src1, src2, dst)
   609  			a.Sub(cr, dst, dst, KeepCarry)
   610  			a.EOL("...")
   611  
   612  		case SetCarry:
   613  			if a.IsZero(src1) && src2 == dst {
   614  				// Only here to clear the carry flag.
   615  				a.Xor(cr, cr, cr)
   616  				break
   617  			}
   618  			// Compute the new carry first, in case dst is src1 or src2.
   619  			a.SLTU(src1, src2, cr)
   620  			a.EOL("SUBS %s, %s, %s", src1, src2, dst)
   621  			a.Sub(src1, src2, dst, KeepCarry)
   622  			a.EOL("...")
   623  
   624  		case UseCarry | SetCarry:
   625  			if a.IsZero(src1) {
   626  				// Only here to use and then set the carry.
   627  				if src2 == dst {
   628  					// Unfortunate case. Using src2==dst is common (think x -= y)
   629  					// and also more efficient on two-operand machines (like x86),
   630  					// but here subtracting from dst will smash src2, making it
   631  					// impossible to recover the carry information after the SUB.
   632  					// But we want to use the carry, so we can't compute it before
   633  					// the SUB either. Compute into a temporary and MOV.
   634  					a.SLTU(cr, src2, tmp)
   635  					a.EOL("SBCS $0, %s, %s", src2, dst)
   636  					a.Sub(cr, src2, dst, KeepCarry)
   637  					a.EOL("...")
   638  					a.Mov(tmp, cr)
   639  					a.EOL("...")
   640  					break
   641  				}
   642  				a.Sub(cr, src2, dst, KeepCarry) // src2 not dst, so src2 preserved
   643  				a.SLTU(cr, src2, cr)
   644  				break
   645  			}
   646  			// General case. Need to do two different subtracts (src2 - cr - src1),
   647  			// computing carry bits for both, and add'ing them together.
   648  			// Doing src2 - cr first frees up cr to store the carry from the sub of src1.
   649  			a.SLTU(cr, src2, tmp)
   650  			a.EOL("SBCS %s, %s, %s", src1, src2, dst)
   651  			a.Sub(cr, src2, dst, KeepCarry)
   652  			a.EOL("...")
   653  			a.SLTU(src1, dst, cr)
   654  			a.EOL("...")
   655  			a.Sub(src1, dst, dst, KeepCarry)
   656  			a.EOL("...")
   657  			a.Add(tmp, cr, cr, KeepCarry)
   658  			a.EOL("...")
   659  		}
   660  	}
   661  }
   662  
   663  // ClearCarry clears the carry flag.
   664  // The ‘which’ parameter must be AddCarry or SubCarry to specify how the flag will be used.
   665  // (On some systems, the sub carry's actual processor bit is inverted from its usual value.)
   666  func (a *Asm) ClearCarry(which Carry) {
   667  	dst := Reg{a.Arch.regs[0]} // not actually modified
   668  	switch which & (AddCarry | SubCarry) {
   669  	default:
   670  		a.Fatalf("bad carry")
   671  	case AddCarry:
   672  		a.Add(a.Imm(0), dst, dst, SetCarry|which&AltCarry)
   673  	case SubCarry:
   674  		a.Sub(a.Imm(0), dst, dst, SetCarry|which&AltCarry)
   675  	}
   676  	a.EOL("clear carry")
   677  }
   678  
   679  // SaveCarry saves the carry flag into dst.
   680  // The meaning of the bits in dst is architecture-dependent.
   681  // The carry flag is left in an undefined state.
   682  func (a *Asm) SaveCarry(dst Reg) {
   683  	// Note: As implemented here, the carry flag is actually left unmodified,
   684  	// but we say it is in an undefined state in case that changes in the future.
   685  	// (The SmashCarry could be changed to SetCarry if so.)
   686  	if cr := a.Carry(); cr.Valid() {
   687  		if cr == dst {
   688  			return // avoid EOL
   689  		}
   690  		a.Mov(cr, dst)
   691  	} else {
   692  		a.Sub(dst, dst, dst, UseCarry|SmashCarry)
   693  	}
   694  	a.EOL("save carry")
   695  }
   696  
   697  // RestoreCarry restores the carry flag from src.
   698  // src is left in an undefined state.
   699  func (a *Asm) RestoreCarry(src Reg) {
   700  	if cr := a.Carry(); cr.Valid() {
   701  		if cr == src {
   702  			return // avoid EOL
   703  		}
   704  		a.Mov(src, cr)
   705  	} else if a.Arch.subCarryIsBorrow {
   706  		a.Add(src, src, src, SetCarry)
   707  	} else {
   708  		// SaveCarry saved the sub carry flag with an encoding of 0, 1 -> 0, ^0.
   709  		// Restore it by subtracting from a value less than ^0, which will carry if src != 0.
   710  		// If there is no zero register, the SP register is guaranteed to be less than ^0.
   711  		// (This may seem too clever, but on GOARCH=arm we have no other good options.)
   712  		a.Sub(src, cmp.Or(a.ZR(), Reg{"SP"}), src, SetCarry)
   713  	}
   714  	a.EOL("restore carry")
   715  }
   716  
   717  // ConvertCarry converts the carry flag in dst from the internal format to a 0 or 1.
   718  // The carry flag is left in an undefined state.
   719  func (a *Asm) ConvertCarry(which Carry, dst Reg) {
   720  	if a.Carry().Valid() { // already 0 or 1
   721  		return
   722  	}
   723  	switch which {
   724  	case AddCarry:
   725  		if a.Arch.subCarryIsBorrow {
   726  			a.Neg(dst, dst)
   727  		} else {
   728  			a.Add(a.Imm(1), dst, dst, SmashCarry)
   729  		}
   730  		a.EOL("convert add carry")
   731  	case SubCarry:
   732  		a.Neg(dst, dst)
   733  		a.EOL("convert sub carry")
   734  	}
   735  }
   736  
   737  // SaveConvertCarry saves and converts the carry flag into dst: 0 unset, 1 set.
   738  // The carry flag is left in an undefined state.
   739  func (a *Asm) SaveConvertCarry(which Carry, dst Reg) {
   740  	switch which {
   741  	default:
   742  		a.Fatalf("bad carry")
   743  	case AddCarry:
   744  		if (a.Arch.adc != "" || a.Arch.adcs != "") && a.ZR().Valid() {
   745  			a.Add(a.ZR(), a.ZR(), dst, UseCarry|SmashCarry)
   746  			a.EOL("save & convert add carry")
   747  			return
   748  		}
   749  	case SubCarry:
   750  		// no special cases
   751  	}
   752  	a.SaveCarry(dst)
   753  	a.ConvertCarry(which, dst)
   754  }
   755  
   756  // MulWide emits dstlo = src1 * src2 and dsthi = (src1 * src2) >> WordBits.
   757  // The carry flag is left in an undefined state.
   758  // If dstlo or dsthi is the zero Reg, then those outputs are discarded.
   759  func (a *Asm) MulWide(src1, src2, dstlo, dsthi Reg) {
   760  	switch {
   761  	default:
   762  		a.Fatalf("mulwide not available")
   763  	case a.Arch.mulWideF != nil:
   764  		a.Arch.mulWideF(a, src1, src2, dstlo, dsthi)
   765  	case a.Arch.mul != "" && !dsthi.Valid():
   766  		a.op3(a.Arch.mul, src1, src2, dstlo)
   767  	case a.Arch.mulhi != "" && !dstlo.Valid():
   768  		a.op3(a.Arch.mulhi, src1, src2, dsthi)
   769  	case a.Arch.mul != "" && a.Arch.mulhi != "" && dstlo != src1 && dstlo != src2:
   770  		a.op3(a.Arch.mul, src1, src2, dstlo)
   771  		a.op3(a.Arch.mulhi, src1, src2, dsthi)
   772  	case a.Arch.mul != "" && a.Arch.mulhi != "" && dsthi != src1 && dsthi != src2:
   773  		a.op3(a.Arch.mulhi, src1, src2, dsthi)
   774  		a.op3(a.Arch.mul, src1, src2, dstlo)
   775  	}
   776  }
   777  
   778  // Jmp jumps to the label.
   779  func (a *Asm) Jmp(label string) {
   780  	// Note: Some systems prefer the spelling B or BR, but all accept JMP.
   781  	a.Printf("\tJMP %s\n", label)
   782  }
   783  
   784  // JmpZero jumps to the label if src is zero.
   785  // It may modify the carry flag unless a.Arch.CarrySafeLoop is true.
   786  func (a *Asm) JmpZero(src Reg, label string) {
   787  	a.Printf("\t"+a.Arch.jmpZero+"\n", src, label)
   788  }
   789  
   790  // JmpNonZero jumps to the label if src is non-zero.
   791  // It may modify the carry flag unless a.Arch,CarrySafeLoop is true.
   792  func (a *Asm) JmpNonZero(src Reg, label string) {
   793  	a.Printf("\t"+a.Arch.jmpNonZero+"\n", src, label)
   794  }
   795  
   796  // Label emits a label with the given name.
   797  func (a *Asm) Label(name string) {
   798  	a.Printf("%s:\n", name)
   799  }
   800  
   801  // Ret returns.
   802  func (a *Asm) Ret() {
   803  	a.Printf("\tRET\n")
   804  }
   805  

View as plain text