new file mode 100644
@@ -0,0 +1,279 @@
+From 658d8fe9b0a5e589417d59d2b4d50c18a0f44dd0 Mon Sep 17 00:00:00 2001
+From: "Paul E. Murphy" <murp@ibm.com>
+Date: Mon, 3 May 2021 11:00:54 -0500
+Subject: [PATCH 1/2] cmd/link: use TOC-relative trampolines on PPC64 when
+ needed
+
+When linking a PIE binary with the internal linker, TOC relative
+relocations need to be generated. Update trampolines to indirect
+call using R12 to more closely match the AIX/ELFv2 regardless of
+buildmode, and work with position-indepdent code.
+
+Likewise, update the check for offseting R_CALLPOWER relocs to
+make a local call. It should be checking ldr.AttrExternal, not
+ldr.IsExternal. This offset should not be adjusted for external
+(non-go) object files, it is handled when ELF reloc are translated
+into go relocs.
+
+And, update trampoline tests to verify these are generated correctly
+and produce a working binary using -buildmode=pie on ppc64le.
+
+Fixes #52337
+
+Change-Id: I8a2dea06c3237bdf0e87888b56a17b6c4c99a7de
+Reviewed-on: https://go-review.googlesource.com/c/go/+/400234
+Reviewed-by: Than McIntosh <thanm@google.com>
+Reviewed-by: Cherry Mui <cherryyz@google.com>
+Run-TryBot: Paul Murphy <murp@ibm.com>
+TryBot-Result: Gopher Robot <gobot@golang.org>
+(cherry picked from commit caa46312eeca1275ce22ecf8985ca31ef8de7883)
+Signed-off-by: Joel Stanley <joel@jms.id.au>
+---
+ src/cmd/link/internal/ppc64/asm.go | 51 +++++++---------
+ src/cmd/link/link_test.go | 98 +++++++++++++++++-------------
+ 2 files changed, 78 insertions(+), 71 deletions(-)
+
+diff --git a/src/cmd/link/internal/ppc64/asm.go b/src/cmd/link/internal/ppc64/asm.go
+index d2b140b45d..73c2718a33 100644
+--- a/src/cmd/link/internal/ppc64/asm.go
++++ b/src/cmd/link/internal/ppc64/asm.go
+@@ -766,7 +766,7 @@ func trampoline(ctxt *ld.Link, ldr *loader.Loader, ri int, rs, s loader.Sym) {
+ // For external linking, the linker can insert a call stub to handle a long call, but depends on having the TOC address in
+ // r2. For those build modes with external linking where the TOC address is not maintained in r2, trampolines must be created.
+ if ctxt.IsExternal() && r2Valid(ctxt) {
+- // No trampolines needed since r2 contains the TOC
++ // The TOC pointer is valid. The external linker will insert trampolines.
+ return
+ }
+
+@@ -819,14 +819,9 @@ func trampoline(ctxt *ld.Link, ldr *loader.Loader, ri int, rs, s loader.Sym) {
+ }
+ }
+ if ldr.SymType(tramp) == 0 {
+- if r2Valid(ctxt) {
+- // Should have returned for above cases
+- ctxt.Errorf(s, "unexpected trampoline for shared or dynamic linking")
+- } else {
+- trampb := ldr.MakeSymbolUpdater(tramp)
+- ctxt.AddTramp(trampb)
+- gentramp(ctxt, ldr, trampb, rs, r.Add())
+- }
++ trampb := ldr.MakeSymbolUpdater(tramp)
++ ctxt.AddTramp(trampb)
++ gentramp(ctxt, ldr, trampb, rs, r.Add())
+ }
+ sb := ldr.MakeSymbolUpdater(s)
+ relocs := sb.Relocs()
+@@ -842,7 +837,6 @@ func trampoline(ctxt *ld.Link, ldr *loader.Loader, ri int, rs, s loader.Sym) {
+ func gentramp(ctxt *ld.Link, ldr *loader.Loader, tramp *loader.SymbolBuilder, target loader.Sym, offset int64) {
+ tramp.SetSize(16) // 4 instructions
+ P := make([]byte, tramp.Size())
+- t := ldr.SymValue(target) + offset
+ var o1, o2 uint32
+
+ if ctxt.IsAIX() {
+@@ -851,8 +845,8 @@ func gentramp(ctxt *ld.Link, ldr *loader.Loader, tramp *loader.SymbolBuilder, ta
+ // However, all text symbols are accessed with a TOC symbol as
+ // text relocations aren't supposed to be possible.
+ // So, keep using the external linking way to be more AIX friendly.
+- o1 = uint32(0x3fe20000) // lis r2, toctargetaddr hi
+- o2 = uint32(0xebff0000) // ld r31, toctargetaddr lo
++ o1 = uint32(0x3c000000) | 12<<21 | 2<<16 // addis r12, r2, toctargetaddr hi
++ o2 = uint32(0xe8000000) | 12<<21 | 12<<16 // ld r12, r12, toctargetaddr lo
+
+ toctramp := ldr.CreateSymForUpdate("TOC."+ldr.SymName(tramp.Sym()), 0)
+ toctramp.SetType(sym.SXCOFFTOC)
+@@ -866,31 +860,32 @@ func gentramp(ctxt *ld.Link, ldr *loader.Loader, tramp *loader.SymbolBuilder, ta
+ // Used for default build mode for an executable
+ // Address of the call target is generated using
+ // relocation and doesn't depend on r2 (TOC).
+- o1 = uint32(0x3fe00000) // lis r31,targetaddr hi
+- o2 = uint32(0x3bff0000) // addi r31,targetaddr lo
++ o1 = uint32(0x3c000000) | 12<<21 // lis r12,targetaddr hi
++ o2 = uint32(0x38000000) | 12<<21 | 12<<16 // addi r12,r12,targetaddr lo
+
+- // With external linking, the target address must be
+- // relocated using LO and HA
+- if ctxt.IsExternal() || ldr.SymValue(target) == 0 {
++ t := ldr.SymValue(target)
++ if t == 0 || r2Valid(ctxt) || ctxt.IsExternal() {
++ // Target address is unknown, generate relocations
+ r, _ := tramp.AddRel(objabi.R_ADDRPOWER)
++ if r2Valid(ctxt) {
++ // Use a TOC relative address if R2 holds the TOC pointer
++ o1 |= uint32(2 << 16) // Transform lis r31,ha into addis r31,r2,ha
++ r.SetType(objabi.R_ADDRPOWER_TOCREL)
++ }
+ r.SetOff(0)
+ r.SetSiz(8) // generates 2 relocations: HA + LO
+ r.SetSym(target)
+ r.SetAdd(offset)
+ } else {
+- // adjustment needed if lo has sign bit set
+- // when using addi to compute address
+- val := uint32((t & 0xffff0000) >> 16)
+- if t&0x8000 != 0 {
+- val += 1
+- }
+- o1 |= val // hi part of addr
+- o2 |= uint32(t & 0xffff) // lo part of addr
++ // The target address is known, resolve it
++ t += offset
++ o1 |= (uint32(t) + 0x8000) >> 16 // HA
++ o2 |= uint32(t) & 0xFFFF // LO
+ }
+ }
+
+- o3 := uint32(0x7fe903a6) // mtctr r31
+- o4 := uint32(0x4e800420) // bctr
++ o3 := uint32(0x7c0903a6) | 12<<21 // mtctr r12
++ o4 := uint32(0x4e800420) // bctr
+ ctxt.Arch.ByteOrder.PutUint32(P, o1)
+ ctxt.Arch.ByteOrder.PutUint32(P[4:], o2)
+ ctxt.Arch.ByteOrder.PutUint32(P[8:], o3)
+@@ -962,7 +957,7 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade
+ // If we are linking PIE or shared code, all golang generated object files have an extra 2 instruction prologue
+ // to regenerate the TOC pointer from R12. The exception are two special case functions tested below. Note,
+ // local call offsets for externally generated objects are accounted for when converting into golang relocs.
+- if !ldr.IsExternal(rs) && ldr.AttrShared(rs) && tgtName != "runtime.duffzero" && tgtName != "runtime.duffcopy" {
++ if !ldr.AttrExternal(rs) && ldr.AttrShared(rs) && tgtName != "runtime.duffzero" && tgtName != "runtime.duffcopy" {
+ // Furthermore, only apply the offset if the target looks like the start of a function call.
+ if r.Add() == 0 && ldr.SymType(rs) == sym.STEXT {
+ t += 8
+diff --git a/src/cmd/link/link_test.go b/src/cmd/link/link_test.go
+index ad7658bb25..d58ee8669b 100644
+--- a/src/cmd/link/link_test.go
++++ b/src/cmd/link/link_test.go
+@@ -642,8 +642,12 @@ func TestTrampoline(t *testing.T) {
+ // For stress test, we set -debugtramp=2 flag, which sets a very low
+ // threshold for trampoline generation, and essentially all cross-package
+ // calls will use trampolines.
++ buildmodes := []string{"default"}
+ switch runtime.GOARCH {
+- case "arm", "arm64", "ppc64", "ppc64le":
++ case "arm", "arm64", "ppc64":
++ case "ppc64le":
++ // Trampolines are generated differently when internal linking PIE, test them too.
++ buildmodes = append(buildmodes, "pie")
+ default:
+ t.Skipf("trampoline insertion is not implemented on %s", runtime.GOARCH)
+ }
+@@ -661,18 +665,20 @@ func TestTrampoline(t *testing.T) {
+ }
+ exe := filepath.Join(tmpdir, "hello.exe")
+
+- cmd := exec.Command(testenv.GoToolPath(t), "build", "-ldflags=-debugtramp=2", "-o", exe, src)
+- out, err := cmd.CombinedOutput()
+- if err != nil {
+- t.Fatalf("build failed: %v\n%s", err, out)
+- }
+- cmd = exec.Command(exe)
+- out, err = cmd.CombinedOutput()
+- if err != nil {
+- t.Errorf("executable failed to run: %v\n%s", err, out)
+- }
+- if string(out) != "hello\n" {
+- t.Errorf("unexpected output:\n%s", out)
++ for _, mode := range buildmodes {
++ cmd := exec.Command(testenv.GoToolPath(t), "build", "-buildmode="+mode, "-ldflags=-debugtramp=2", "-o", exe, src)
++ out, err := cmd.CombinedOutput()
++ if err != nil {
++ t.Fatalf("build (%s) failed: %v\n%s", mode, err, out)
++ }
++ cmd = exec.Command(exe)
++ out, err = cmd.CombinedOutput()
++ if err != nil {
++ t.Errorf("executable failed to run (%s): %v\n%s", mode, err, out)
++ }
++ if string(out) != "hello\n" {
++ t.Errorf("unexpected output (%s):\n%s", mode, out)
++ }
+ }
+ }
+
+@@ -693,8 +699,12 @@ func TestTrampolineCgo(t *testing.T) {
+ // For stress test, we set -debugtramp=2 flag, which sets a very low
+ // threshold for trampoline generation, and essentially all cross-package
+ // calls will use trampolines.
++ buildmodes := []string{"default"}
+ switch runtime.GOARCH {
+- case "arm", "arm64", "ppc64", "ppc64le":
++ case "arm", "arm64", "ppc64":
++ case "ppc64le":
++ // Trampolines are generated differently when internal linking PIE, test them too.
++ buildmodes = append(buildmodes, "pie")
+ default:
+ t.Skipf("trampoline insertion is not implemented on %s", runtime.GOARCH)
+ }
+@@ -713,37 +723,39 @@ func TestTrampolineCgo(t *testing.T) {
+ }
+ exe := filepath.Join(tmpdir, "hello.exe")
+
+- cmd := exec.Command(testenv.GoToolPath(t), "build", "-ldflags=-debugtramp=2", "-o", exe, src)
+- out, err := cmd.CombinedOutput()
+- if err != nil {
+- t.Fatalf("build failed: %v\n%s", err, out)
+- }
+- cmd = exec.Command(exe)
+- out, err = cmd.CombinedOutput()
+- if err != nil {
+- t.Errorf("executable failed to run: %v\n%s", err, out)
+- }
+- if string(out) != "hello\n" && string(out) != "hello\r\n" {
+- t.Errorf("unexpected output:\n%s", out)
+- }
++ for _, mode := range buildmodes {
++ cmd := exec.Command(testenv.GoToolPath(t), "build", "-buildmode="+mode, "-ldflags=-debugtramp=2", "-o", exe, src)
++ out, err := cmd.CombinedOutput()
++ if err != nil {
++ t.Fatalf("build (%s) failed: %v\n%s", mode, err, out)
++ }
++ cmd = exec.Command(exe)
++ out, err = cmd.CombinedOutput()
++ if err != nil {
++ t.Errorf("executable failed to run (%s): %v\n%s", mode, err, out)
++ }
++ if string(out) != "hello\n" && string(out) != "hello\r\n" {
++ t.Errorf("unexpected output (%s):\n%s", mode, out)
++ }
+
+- // Test internal linking mode.
++ // Test internal linking mode.
+
+- if runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || (runtime.GOARCH == "arm64" && runtime.GOOS == "windows") || !testenv.CanInternalLink() {
+- return // internal linking cgo is not supported
+- }
+- cmd = exec.Command(testenv.GoToolPath(t), "build", "-ldflags=-debugtramp=2 -linkmode=internal", "-o", exe, src)
+- out, err = cmd.CombinedOutput()
+- if err != nil {
+- t.Fatalf("build failed: %v\n%s", err, out)
+- }
+- cmd = exec.Command(exe)
+- out, err = cmd.CombinedOutput()
+- if err != nil {
+- t.Errorf("executable failed to run: %v\n%s", err, out)
+- }
+- if string(out) != "hello\n" && string(out) != "hello\r\n" {
+- t.Errorf("unexpected output:\n%s", out)
++ if runtime.GOARCH == "ppc64" || (runtime.GOARCH == "arm64" && runtime.GOOS == "windows") || !testenv.CanInternalLink() {
++ return // internal linking cgo is not supported
++ }
++ cmd = exec.Command(testenv.GoToolPath(t), "build", "-buildmode="+mode, "-ldflags=-debugtramp=2 -linkmode=internal", "-o", exe, src)
++ out, err = cmd.CombinedOutput()
++ if err != nil {
++ t.Fatalf("build (%s) failed: %v\n%s", mode, err, out)
++ }
++ cmd = exec.Command(exe)
++ out, err = cmd.CombinedOutput()
++ if err != nil {
++ t.Errorf("executable failed to run (%s): %v\n%s", mode, err, out)
++ }
++ if string(out) != "hello\n" && string(out) != "hello\r\n" {
++ t.Errorf("unexpected output (%s):\n%s", mode, out)
++ }
+ }
+ }
+
+--
+2.31.1
+
new file mode 100644
@@ -0,0 +1,549 @@
+From 282feab7f35e27749bb82603a2867e0fb03f54c2 Mon Sep 17 00:00:00 2001
+From: "Paul E. Murphy" <murp@ibm.com>
+Date: Fri, 15 Apr 2022 17:06:48 -0500
+Subject: [PATCH 2/2] cmd/link: generate PPC64 ABI register save/restore
+ functions if needed
+
+They are usually needed when internally linking gcc code
+compiled with -Os. These are typically generated by ld
+or gold, but are missing when linking internally.
+
+The PPC64 ELF ABI describes a set of functions to save/restore
+non-volatile, callee-save registers using R1/R0/R12:
+
+ _savegpr0_n: Save Rn-R31 relative to R1, save LR (in R0), return
+ _restgpr0_n: Restore Rn-R31 from R1, and return to saved LR
+ _savefpr_n: Save Fn-F31 based on R1, and save LR (in R0), return
+ _restfpr_n: Restore Fn-F31 from R1, and return to 16(R1)
+ _savegpr1_n: Save Rn-R31 based on R12, return
+ _restgpr1_n: Restore Rn-R31 based on R12, return
+ _savevr_m: Save VRm-VR31 based on R0, R12 is scratch, return
+ _restvr_m: Restore VRm-VR31 based on R0, R12 is scratch, return
+
+ m is a value 20<=m<=31
+ n is a value 14<=n<=31
+
+Add several new functions similar to those suggested by the
+PPC64 ELFv2 ABI. And update the linker to scan external relocs
+for these calls, and redirect them to runtime.elf_<func>+offset
+in runtime/asm_ppc64x.go.
+
+Similarly, code which generates plt stubs is moved into
+a dedicated function. This avoids an extra scan of relocs.
+
+fixes #52336
+
+Change-Id: I2f0f8b5b081a7b294dff5c92b4b1db8eba9a9400
+Reviewed-on: https://go-review.googlesource.com/c/go/+/400796
+Reviewed-by: Cherry Mui <cherryyz@google.com>
+Reviewed-by: David Chase <drchase@google.com>
+Run-TryBot: Paul Murphy <murp@ibm.com>
+TryBot-Result: Gopher Robot <gobot@golang.org>
+(cherry picked from commit 9c9090eb1da540c7d757df0c60423319a28759d3)
+Signed-off-by: Joel Stanley <joel@jms.id.au>
+---
+ .../script/test_ppc64_linker_funcs.txt | 49 +++++
+ src/cmd/link/internal/ppc64/asm.go | 182 ++++++++++++----
+ src/runtime/asm_ppc64x.s | 203 ++++++++++++++++++
+ 3 files changed, 392 insertions(+), 42 deletions(-)
+ create mode 100644 src/cmd/go/testdata/script/test_ppc64_linker_funcs.txt
+
+diff --git a/src/cmd/go/testdata/script/test_ppc64_linker_funcs.txt b/src/cmd/go/testdata/script/test_ppc64_linker_funcs.txt
+new file mode 100644
+index 0000000000..a33f9df724
+--- /dev/null
++++ b/src/cmd/go/testdata/script/test_ppc64_linker_funcs.txt
+@@ -0,0 +1,49 @@
++# Tests that the linker implements the PPC64 ELFv2 ABI
++# register save and restore functions as defined in
++# section 2.3.3.1 of the PPC64 ELFv2 ABI when linking
++# external objects most likely compiled with gcc's
++# -Os option.
++#
++# Verifies golang.org/issue/52366 for linux/ppc64le
++[!linux] skip
++[!gc] skip
++[!cgo] skip
++[!ppc64le] skip
++
++go build -ldflags='-linkmode=internal'
++exec ./abitest
++stdout success
++
++-- go.mod --
++module abitest
++
++-- abitest.go --
++package main
++
++/*
++#cgo CFLAGS: -Os
++
++int foo_fpr() {
++ asm volatile("":::"fr31","fr30","fr29","fr28");
++}
++int foo_gpr0() {
++ asm volatile("":::"r30","r29","r28");
++}
++int foo_gpr1() {
++ asm volatile("":::"fr31", "fr30","fr29","fr28","r30","r29","r28");
++}
++int foo_vr() {
++ asm volatile("":::"v31","v30","v29","v28");
++}
++*/
++import "C"
++
++import "fmt"
++
++func main() {
++ C.foo_fpr()
++ C.foo_gpr0()
++ C.foo_gpr1()
++ C.foo_vr()
++ fmt.Println("success")
++}
+diff --git a/src/cmd/link/internal/ppc64/asm.go b/src/cmd/link/internal/ppc64/asm.go
+index 73c2718a33..5d5fbe2a97 100644
+--- a/src/cmd/link/internal/ppc64/asm.go
++++ b/src/cmd/link/internal/ppc64/asm.go
+@@ -40,10 +40,11 @@ import (
+ "encoding/binary"
+ "fmt"
+ "log"
++ "strconv"
+ "strings"
+ )
+
+-func genplt(ctxt *ld.Link, ldr *loader.Loader) {
++func genpltstub(ctxt *ld.Link, ldr *loader.Loader, r loader.Reloc, s loader.Sym) (sym loader.Sym, firstUse bool) {
+ // The ppc64 ABI PLT has similar concepts to other
+ // architectures, but is laid out quite differently. When we
+ // see an R_PPC64_REL24 relocation to a dynamic symbol
+@@ -92,53 +93,82 @@ func genplt(ctxt *ld.Link, ldr *loader.Loader) {
+ //
+ // This assumes "case 1" from the ABI, where the caller needs
+ // us to save and restore the TOC pointer.
+- var stubs []loader.Sym
+- for _, s := range ctxt.Textp {
+- relocs := ldr.Relocs(s)
+- for i := 0; i < relocs.Count(); i++ {
+- r := relocs.At(i)
+- if r.Type() != objabi.ElfRelocOffset+objabi.RelocType(elf.R_PPC64_REL24) || ldr.SymType(r.Sym()) != sym.SDYNIMPORT {
+- continue
+- }
+
+- // Reserve PLT entry and generate symbol
+- // resolver
+- addpltsym(ctxt, ldr, r.Sym())
+-
+- // Generate call stub. Important to note that we're looking
+- // up the stub using the same version as the parent symbol (s),
+- // needed so that symtoc() will select the right .TOC. symbol
+- // when processing the stub. In older versions of the linker
+- // this was done by setting stub.Outer to the parent, but
+- // if the stub has the right version initially this is not needed.
+- n := fmt.Sprintf("%s.%s", ldr.SymName(s), ldr.SymName(r.Sym()))
+- stub := ldr.CreateSymForUpdate(n, ldr.SymVersion(s))
+- if stub.Size() == 0 {
+- stubs = append(stubs, stub.Sym())
+- gencallstub(ctxt, ldr, 1, stub, r.Sym())
+- }
++ // Reserve PLT entry and generate symbol
++ // resolver
++ addpltsym(ctxt, ldr, r.Sym())
++
++ // Generate call stub. Important to note that we're looking
++ // up the stub using the same version as the parent symbol (s),
++ // needed so that symtoc() will select the right .TOC. symbol
++ // when processing the stub. In older versions of the linker
++ // this was done by setting stub.Outer to the parent, but
++ // if the stub has the right version initially this is not needed.
++ n := fmt.Sprintf("%s.%s", ldr.SymName(s), ldr.SymName(r.Sym()))
++ stub := ldr.CreateSymForUpdate(n, ldr.SymVersion(s))
++ firstUse = stub.Size() == 0
++ if firstUse {
++ gencallstub(ctxt, ldr, 1, stub, r.Sym())
++ }
+
+- // Update the relocation to use the call stub
+- r.SetSym(stub.Sym())
++ // Update the relocation to use the call stub
++ r.SetSym(stub.Sym())
+
+- // Make the symbol writeable so we can fixup toc.
+- su := ldr.MakeSymbolUpdater(s)
+- su.MakeWritable()
+- p := su.Data()
++ // Make the symbol writeable so we can fixup toc.
++ su := ldr.MakeSymbolUpdater(s)
++ su.MakeWritable()
++ p := su.Data()
+
+- // Check for toc restore slot (a nop), and replace with toc restore.
+- var nop uint32
+- if len(p) >= int(r.Off()+8) {
+- nop = ctxt.Arch.ByteOrder.Uint32(p[r.Off()+4:])
+- }
+- if nop != 0x60000000 {
+- ldr.Errorf(s, "Symbol %s is missing toc restoration slot at offset %d", ldr.SymName(s), r.Off()+4)
++ // Check for toc restore slot (a nop), and replace with toc restore.
++ var nop uint32
++ if len(p) >= int(r.Off()+8) {
++ nop = ctxt.Arch.ByteOrder.Uint32(p[r.Off()+4:])
++ }
++ if nop != 0x60000000 {
++ ldr.Errorf(s, "Symbol %s is missing toc restoration slot at offset %d", ldr.SymName(s), r.Off()+4)
++ }
++ const o1 = 0xe8410018 // ld r2,24(r1)
++ ctxt.Arch.ByteOrder.PutUint32(p[r.Off()+4:], o1)
++
++ return stub.Sym(), firstUse
++}
++
++// Scan relocs and generate PLT stubs and generate/fixup ABI defined functions created by the linker
++func genstubs(ctxt *ld.Link, ldr *loader.Loader) {
++ var stubs []loader.Sym
++ var abifuncs []loader.Sym
++ for _, s := range ctxt.Textp {
++ relocs := ldr.Relocs(s)
++ for i := 0; i < relocs.Count(); i++ {
++ if r := relocs.At(i); r.Type() == objabi.ElfRelocOffset+objabi.RelocType(elf.R_PPC64_REL24) {
++ switch ldr.SymType(r.Sym()) {
++ case sym.SDYNIMPORT:
++ // This call goes throught the PLT, generate and call through a PLT stub.
++ if sym, firstUse := genpltstub(ctxt, ldr, r, s); firstUse {
++ stubs = append(stubs, sym)
++ }
++
++ case sym.SXREF:
++ // Is this an ELF ABI defined function which is (in practice)
++ // generated by the linker to save/restore callee save registers?
++ // These are defined similarly for both PPC64 ELF and ELFv2.
++ targName := ldr.SymName(r.Sym())
++ if strings.HasPrefix(targName, "_save") || strings.HasPrefix(targName, "_rest") {
++ if sym, firstUse := rewriteABIFuncReloc(ctxt, ldr, targName, r); firstUse {
++ abifuncs = append(abifuncs, sym)
++ }
++ }
++ }
+ }
+- const o1 = 0xe8410018 // ld r2,24(r1)
+- ctxt.Arch.ByteOrder.PutUint32(p[r.Off()+4:], o1)
+ }
+ }
+- // Put call stubs at the beginning (instead of the end).
++
++ // Append any usage of the go versions of ELF save/restore
++ // functions to the end of the callstub list to minimize
++ // chances a trampoline might be needed.
++ stubs = append(stubs, abifuncs...)
++
++ // Put stubs at the beginning (instead of the end).
+ // So when resolving the relocations to calls to the stubs,
+ // the addresses are known and trampolines can be inserted
+ // when necessary.
+@@ -202,13 +232,74 @@ func genaddmoduledata(ctxt *ld.Link, ldr *loader.Loader) {
+ o(0x4e800020)
+ }
+
++// Rewrite ELF (v1 or v2) calls to _savegpr0_n, _savegpr1_n, _savefpr_n, _restfpr_n, _savevr_m, or
++// _restvr_m (14<=n<=31, 20<=m<=31). Redirect them to runtime.elf_restgpr0+(n-14)*4,
++// runtime.elf_restvr+(m-20)*8, and similar.
++//
++// These functions are defined in the ELFv2 ABI (generated when using gcc -Os option) to save and
++// restore callee-saved registers (as defined in the PPC64 ELF ABIs) from registers n or m to 31 of
++// the named type. R12 and R0 are sometimes used in exceptional ways described in the ABI.
++//
++// Final note, this is only needed when linking internally. The external linker will generate these
++// functions if they are used.
++func rewriteABIFuncReloc(ctxt *ld.Link, ldr *loader.Loader, tname string, r loader.Reloc) (sym loader.Sym, firstUse bool) {
++ s := strings.Split(tname, "_")
++ // A valid call will split like {"", "savegpr0", "20"}
++ if len(s) != 3 {
++ return 0, false // Not an abi func.
++ }
++ minReg := 14 // _savegpr0_{n}, _savegpr1_{n}, _savefpr_{n}, 14 <= n <= 31
++ offMul := 4 // 1 instruction per register op.
++ switch s[1] {
++ case "savegpr0", "savegpr1", "savefpr":
++ case "restgpr0", "restgpr1", "restfpr":
++ case "savevr", "restvr":
++ minReg = 20 // _savevr_{n} or _restvr_{n}, 20 <= n <= 31
++ offMul = 8 // 2 instructions per register op.
++ default:
++ return 0, false // Not an abi func
++ }
++ n, e := strconv.Atoi(s[2])
++ if e != nil || n < minReg || n > 31 || r.Add() != 0 {
++ return 0, false // Invalid register number, or non-zero addend. Not an abi func.
++ }
++
++ // tname is a valid relocation to an ABI defined register save/restore function. Re-relocate
++ // them to a go version of these functions in runtime/asm_ppc64x.s
++ ts := ldr.LookupOrCreateSym("runtime.elf_"+s[1], 0)
++ r.SetSym(ts)
++ r.SetAdd(int64((n - minReg) * offMul))
++ firstUse = !ldr.AttrReachable(ts)
++ if firstUse {
++ ldr.SetAttrReachable(ts, true)
++ // This function only becomes reachable now. It has been dropped from
++ // the text section (it was unreachable until now), it needs included.
++ //
++ // Similarly, TOC regeneration should not happen for these functions,
++ // remove it from this save/restore function.
++ if ldr.AttrShared(ts) {
++ sb := ldr.MakeSymbolUpdater(ts)
++ sb.SetData(sb.Data()[8:])
++ sb.SetSize(sb.Size() - 8)
++ relocs := sb.Relocs()
++ // Only one PCREL reloc to .TOC. should be present.
++ if relocs.Count() != 1 {
++ log.Fatalf("Unexpected number of relocs in %s\n", ldr.SymName(ts))
++ }
++ sb.ResetRelocs()
++
++ }
++ }
++ return ts, firstUse
++}
++
+ func gentext(ctxt *ld.Link, ldr *loader.Loader) {
+ if ctxt.DynlinkingGo() {
+ genaddmoduledata(ctxt, ldr)
+ }
+
+ if ctxt.LinkMode == ld.LinkInternal {
+- genplt(ctxt, ldr)
++ genstubs(ctxt, ldr)
+ }
+ }
+
+@@ -863,6 +954,13 @@ func gentramp(ctxt *ld.Link, ldr *loader.Loader, tramp *loader.SymbolBuilder, ta
+ o1 = uint32(0x3c000000) | 12<<21 // lis r12,targetaddr hi
+ o2 = uint32(0x38000000) | 12<<21 | 12<<16 // addi r12,r12,targetaddr lo
+
++ // ELFv2 save/restore functions use R0/R12 in special ways, therefore trampolines
++ // as generated here will not always work correctly.
++ if strings.HasPrefix(ldr.SymName(target), "runtime.elf_") {
++ log.Fatalf("Internal linker does not support trampolines to ELFv2 ABI"+
++ " register save/restore function %s", ldr.SymName(target))
++ }
++
+ t := ldr.SymValue(target)
+ if t == 0 || r2Valid(ctxt) || ctxt.IsExternal() {
+ // Target address is unknown, generate relocations
+diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s
+index ae14213999..0344ed4a62 100644
+--- a/src/runtime/asm_ppc64x.s
++++ b/src/runtime/asm_ppc64x.s
+@@ -1143,3 +1143,206 @@ TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
+ MOVD R6, y+8(FP)
+ #endif
+ JMP runtime·goPanicSliceConvert<ABIInternal>(SB)
++
++// These functions are used when internal linking cgo with external
++// objects compiled with the -Os on gcc. They reduce prologue/epilogue
++// size by deferring preservation of callee-save registers to a shared
++// function. These are defined in PPC64 ELFv2 2.3.3 (but also present
++// in ELFv1)
++//
++// These appear unused, but the linker will redirect calls to functions
++// like _savegpr0_14 or _restgpr1_14 to runtime.elf_savegpr0 or
++// runtime.elf_restgpr1 with an appropriate offset based on the number
++// register operations required when linking external objects which
++// make these calls. For GPR/FPR saves, the minimum register value is
++// 14, for VR it is 20.
++//
++// These are only used when linking such cgo code internally. Note, R12
++// and R0 may be used in different ways than regular ELF compliant
++// functions.
++TEXT runtime·elf_savegpr0(SB),NOSPLIT|NOFRAME,$0
++ // R0 holds the LR of the caller's caller, R1 holds save location
++ MOVD R14, -144(R1)
++ MOVD R15, -136(R1)
++ MOVD R16, -128(R1)
++ MOVD R17, -120(R1)
++ MOVD R18, -112(R1)
++ MOVD R19, -104(R1)
++ MOVD R20, -96(R1)
++ MOVD R21, -88(R1)
++ MOVD R22, -80(R1)
++ MOVD R23, -72(R1)
++ MOVD R24, -64(R1)
++ MOVD R25, -56(R1)
++ MOVD R26, -48(R1)
++ MOVD R27, -40(R1)
++ MOVD R28, -32(R1)
++ MOVD R29, -24(R1)
++ MOVD g, -16(R1)
++ MOVD R31, -8(R1)
++ MOVD R0, 16(R1)
++ RET
++TEXT runtime·elf_restgpr0(SB),NOSPLIT|NOFRAME,$0
++ // R1 holds save location. This returns to the LR saved on stack (bypassing the caller)
++ MOVD -144(R1), R14
++ MOVD -136(R1), R15
++ MOVD -128(R1), R16
++ MOVD -120(R1), R17
++ MOVD -112(R1), R18
++ MOVD -104(R1), R19
++ MOVD -96(R1), R20
++ MOVD -88(R1), R21
++ MOVD -80(R1), R22
++ MOVD -72(R1), R23
++ MOVD -64(R1), R24
++ MOVD -56(R1), R25
++ MOVD -48(R1), R26
++ MOVD -40(R1), R27
++ MOVD -32(R1), R28
++ MOVD -24(R1), R29
++ MOVD -16(R1), g
++ MOVD -8(R1), R31
++ MOVD 16(R1), R0 // Load and return to saved LR
++ MOVD R0, LR
++ RET
++TEXT runtime·elf_savegpr1(SB),NOSPLIT|NOFRAME,$0
++ // R12 holds the save location
++ MOVD R14, -144(R12)
++ MOVD R15, -136(R12)
++ MOVD R16, -128(R12)
++ MOVD R17, -120(R12)
++ MOVD R18, -112(R12)
++ MOVD R19, -104(R12)
++ MOVD R20, -96(R12)
++ MOVD R21, -88(R12)
++ MOVD R22, -80(R12)
++ MOVD R23, -72(R12)
++ MOVD R24, -64(R12)
++ MOVD R25, -56(R12)
++ MOVD R26, -48(R12)
++ MOVD R27, -40(R12)
++ MOVD R28, -32(R12)
++ MOVD R29, -24(R12)
++ MOVD g, -16(R12)
++ MOVD R31, -8(R12)
++ RET
++TEXT runtime·elf_restgpr1(SB),NOSPLIT|NOFRAME,$0
++ // R12 holds the save location
++ MOVD -144(R12), R14
++ MOVD -136(R12), R15
++ MOVD -128(R12), R16
++ MOVD -120(R12), R17
++ MOVD -112(R12), R18
++ MOVD -104(R12), R19
++ MOVD -96(R12), R20
++ MOVD -88(R12), R21
++ MOVD -80(R12), R22
++ MOVD -72(R12), R23
++ MOVD -64(R12), R24
++ MOVD -56(R12), R25
++ MOVD -48(R12), R26
++ MOVD -40(R12), R27
++ MOVD -32(R12), R28
++ MOVD -24(R12), R29
++ MOVD -16(R12), g
++ MOVD -8(R12), R31
++ RET
++TEXT runtime·elf_savefpr(SB),NOSPLIT|NOFRAME,$0
++ // R0 holds the LR of the caller's caller, R1 holds save location
++ FMOVD F14, -144(R1)
++ FMOVD F15, -136(R1)
++ FMOVD F16, -128(R1)
++ FMOVD F17, -120(R1)
++ FMOVD F18, -112(R1)
++ FMOVD F19, -104(R1)
++ FMOVD F20, -96(R1)
++ FMOVD F21, -88(R1)
++ FMOVD F22, -80(R1)
++ FMOVD F23, -72(R1)
++ FMOVD F24, -64(R1)
++ FMOVD F25, -56(R1)
++ FMOVD F26, -48(R1)
++ FMOVD F27, -40(R1)
++ FMOVD F28, -32(R1)
++ FMOVD F29, -24(R1)
++ FMOVD F30, -16(R1)
++ FMOVD F31, -8(R1)
++ MOVD R0, 16(R1)
++ RET
++TEXT runtime·elf_restfpr(SB),NOSPLIT|NOFRAME,$0
++ // R1 holds save location. This returns to the LR saved on stack (bypassing the caller)
++ FMOVD -144(R1), F14
++ FMOVD -136(R1), F15
++ FMOVD -128(R1), F16
++ FMOVD -120(R1), F17
++ FMOVD -112(R1), F18
++ FMOVD -104(R1), F19
++ FMOVD -96(R1), F20
++ FMOVD -88(R1), F21
++ FMOVD -80(R1), F22
++ FMOVD -72(R1), F23
++ FMOVD -64(R1), F24
++ FMOVD -56(R1), F25
++ FMOVD -48(R1), F26
++ FMOVD -40(R1), F27
++ FMOVD -32(R1), F28
++ FMOVD -24(R1), F29
++ FMOVD -16(R1), F30
++ FMOVD -8(R1), F31
++ MOVD 16(R1), R0 // Load and return to saved LR
++ MOVD R0, LR
++ RET
++TEXT runtime·elf_savevr(SB),NOSPLIT|NOFRAME,$0
++ // R0 holds the save location, R12 is clobbered
++ MOVD $-192, R12
++ STVX V20, (R0+R12)
++ MOVD $-176, R12
++ STVX V21, (R0+R12)
++ MOVD $-160, R12
++ STVX V22, (R0+R12)
++ MOVD $-144, R12
++ STVX V23, (R0+R12)
++ MOVD $-128, R12
++ STVX V24, (R0+R12)
++ MOVD $-112, R12
++ STVX V25, (R0+R12)
++ MOVD $-96, R12
++ STVX V26, (R0+R12)
++ MOVD $-80, R12
++ STVX V27, (R0+R12)
++ MOVD $-64, R12
++ STVX V28, (R0+R12)
++ MOVD $-48, R12
++ STVX V29, (R0+R12)
++ MOVD $-32, R12
++ STVX V30, (R0+R12)
++ MOVD $-16, R12
++ STVX V31, (R0+R12)
++ RET
++TEXT runtime·elf_restvr(SB),NOSPLIT|NOFRAME,$0
++ // R0 holds the save location, R12 is clobbered
++ MOVD $-192, R12
++ LVX (R0+R12), V20
++ MOVD $-176, R12
++ LVX (R0+R12), V21
++ MOVD $-160, R12
++ LVX (R0+R12), V22
++ MOVD $-144, R12
++ LVX (R0+R12), V23
++ MOVD $-128, R12
++ LVX (R0+R12), V24
++ MOVD $-112, R12
++ LVX (R0+R12), V25
++ MOVD $-96, R12
++ LVX (R0+R12), V26
++ MOVD $-80, R12
++ LVX (R0+R12), V27
++ MOVD $-64, R12
++ LVX (R0+R12), V28
++ MOVD $-48, R12
++ LVX (R0+R12), V29
++ MOVD $-32, R12
++ LVX (R0+R12), V30
++ MOVD $-16, R12
++ LVX (R0+R12), V31
++ RET
+--
+2.31.1
+
The autobuilder has been reporting build failures for golang packages such as moby, containerd, and docker-cli on powerpc64le: net(.text): relocation target _savegpr0_28 not defined http://autobuild.buildroot.net/results/a4aab952acca33577822daf9541c25ccb8e68e60 http://autobuild.buildroot.net/results/07dab4c21074d9f2cfba5020eac1d07e630fa3e9 http://autobuild.buildroot.net/results/e37af28b6ea2293a5a8a3bb1a5c123bd465ff5de Backport the upstream fix for the go compiler, and a dependant patch: https://github.com/golang/go/commit/9c9090eb1da540c7d757df0c60423319a28759d3 https://github.com/golang/go/commit/caa46312eeca1275ce22ecf8985ca31ef8de7883 Signed-off-by: Joel Stanley <joel@jms.id.au> --- ...-relative-trampolines-on-PPC64-when-.patch | 279 +++++++++ ...e-PPC64-ABI-register-save-restore-fu.patch | 549 ++++++++++++++++++ 2 files changed, 828 insertions(+) create mode 100644 package/go/0002-cmd-link-use-TOC-relative-trampolines-on-PPC64-when-.patch create mode 100644 package/go/0003-cmd-link-generate-PPC64-ABI-register-save-restore-fu.patch