From patchwork Tue May 14 14:59:47 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ian Lance Taylor X-Patchwork-Id: 1099543 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=gcc.gnu.org (client-ip=209.132.180.131; helo=sourceware.org; envelope-from=gcc-patches-return-500650-incoming=patchwork.ozlabs.org@gcc.gnu.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=golang.org Authentication-Results: ozlabs.org; dkim=pass (1024-bit key; unprotected) header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b="ssF2kTjg"; dkim=fail reason="signature verification failed" (2048-bit key; unprotected) header.d=golang-org.20150623.gappssmtp.com header.i=@golang-org.20150623.gappssmtp.com header.b="IInssgV0"; dkim-atps=neutral Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 453LVB3CCvz9sN6 for ; Wed, 15 May 2019 01:01:04 +1000 (AEST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :mime-version:from:date:message-id:subject:to:content-type; q= dns; s=default; b=Fa+f/uTTPnz9out5q/skubNA3I9fRG37yY4IQE3sbWBYIU U2a0eq9ASwmfWowTaw/FYRgsg9Mmj3Qc7fR19XY1FmVfAjhuR1KPJd4RTr8Iu1qd 3tsuqOwnM7sdgGz8bVj/DBFaczfzoaZaRrlhsjhvmWkVwiNl+2VeiO0kelFw8= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :mime-version:from:date:message-id:subject:to:content-type; s= default; bh=LiQHwFAp4+3fjMteSVjP0SZYE2Y=; b=ssF2kTjgMnzr1qwx4Q5d r9GMeHiMZgD8d8ygc5P1aQ2fSY3Bth74zpY25vhe1V6sV2rB6PcJf8UmPuNatyG3 x4SehVCOlVSAlCTW4xGbvZFpL3Uf35lSQQCfzSCwxNymIAGvmnuQw8xoRhDCyQyq WTxiFLzo6hh1pDzWInS3TTM= Received: (qmail 23276 invoked by alias); 14 May 2019 15:00:29 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 22993 invoked by uid 89); 14 May 2019 15:00:20 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-12.7 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_ASCII_DIVIDERS, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.1 spammy=terminate, inde, collections, anc X-HELO: mail-lj1-f170.google.com Received: from mail-lj1-f170.google.com (HELO mail-lj1-f170.google.com) (209.85.208.170) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 14 May 2019 15:00:02 +0000 Received: by mail-lj1-f170.google.com with SMTP id 188so14623401ljf.9 for ; Tue, 14 May 2019 08:00:01 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=golang-org.20150623.gappssmtp.com; s=20150623; h=mime-version:from:date:message-id:subject:to; bh=4urcrXGz2z3Xz8++hObpxlY4H77Qm0AKJNw7PKkezkk=; b=IInssgV0MYOKanaVG8Y0hF82jRn2gNMMKnzT/Vc0qqoHs1218LQGjzpR1xgIb6GjR5 AjtaqxBGLMzXfZx7QZY5qKn9cElLIvrenTrAU/rT6X7ZzvGCQ8LgafqnF+lXPrZ2UqYl LeTt/49Y7D0ja+DYvluYMwwyqur8kTSIea14JqV6Dc9o1ZCAaaLLiUnzIaa5Syrgh+8K U0+UcHHhXIAbgNnlKw8QNhSLAliOUNDZ2kz0BrJSwgKGHmziZl98Gl4oJ/B5soY9BekM 5Vj3pUOEOENFZPq7wfoODnrKfgQ0KD9sbll2jAQt9Xzmi/rDxkngyE2mU7vIn6B8RU5d kNDA== MIME-Version: 1.0 From: Ian Lance Taylor Date: Tue, 14 May 2019 07:59:47 -0700 Message-ID: Subject: libgo patch committed: Reduce profiling overhead To: gcc-patches , gofrontend-dev This libgo patch by Than McIntosh revises the gccgo version of memory/block/mutex profiling to reduce runtime overhead. The main change is to collect raw stack traces while the profile is on line, then post-process the stacks just prior to the point where we are ready to use the final product. Memory profiling (at a very low sampling rate) is enabled by default, and the overhead of the symbolization / DWARF-reading from backtrace_full was slowing things down relative to the main Go runtime. Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu. Committed to mainline. Ian Index: gcc/go/gofrontend/MERGE =================================================================== --- gcc/go/gofrontend/MERGE (revision 271135) +++ gcc/go/gofrontend/MERGE (working copy) @@ -1,4 +1,4 @@ -3f015e128bf6d1d9279f3d43e26f60f0927019cb +6112f9b8fa9d57d2db8a709cc8b44a94d778d08a The first line of this file holds the git revision number of the last merge done from the gofrontend repository. Index: libgo/go/runtime/heapdump.go =================================================================== --- libgo/go/runtime/heapdump.go (revision 270877) +++ libgo/go/runtime/heapdump.go (working copy) @@ -437,17 +437,15 @@ func dumpmemstats() { dumpint(uint64(memstats.numgc)) } -func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *location, size, allocs, frees uintptr) { - stk := (*[100000]location)(unsafe.Pointer(pstk)) +func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *uintptr, size, allocs, frees uintptr) { + stk := (*[100000]uintptr)(unsafe.Pointer(pstk)) dumpint(tagMemProf) dumpint(uint64(uintptr(unsafe.Pointer(b)))) dumpint(uint64(size)) dumpint(uint64(nstk)) for i := uintptr(0); i < nstk; i++ { - pc := stk[i].pc - fn := stk[i].function - file := stk[i].filename - line := stk[i].lineno + pc := stk[i] + fn, file, line, _ := funcfileline(pc, -1) if fn == "" { var buf [64]byte n := len(buf) Index: libgo/go/runtime/mgcmark.go =================================================================== --- libgo/go/runtime/mgcmark.go (revision 270877) +++ libgo/go/runtime/mgcmark.go (working copy) @@ -1085,7 +1085,7 @@ func scanstackblockwithmap(pc, b0, n0 ui span != nil && span.state != mSpanManual && (obj < span.base() || obj >= span.limit || span.state != mSpanInUse) { print("runtime: found in object at *(", hex(b), "+", hex(i), ") = ", hex(obj), ", pc=", hex(pc), "\n") - name, file, line := funcfileline(pc, -1) + name, file, line, _ := funcfileline(pc, -1) print(name, "\n", file, ":", line, "\n") //gcDumpObject("object", b, i) throw("found bad pointer in Go stack (incorrect use of unsafe or cgo?)") Index: libgo/go/runtime/mprof.go =================================================================== --- libgo/go/runtime/mprof.go (revision 270877) +++ libgo/go/runtime/mprof.go (working copy) @@ -24,6 +24,10 @@ const ( blockProfile mutexProfile + // a profile bucket from one of the categories above whose stack + // trace has been fixed up / pruned. + prunedProfile + // size of bucket hash table buckHashSize = 179999 @@ -138,11 +142,13 @@ type blockRecord struct { } var ( - mbuckets *bucket // memory profile buckets - bbuckets *bucket // blocking profile buckets - xbuckets *bucket // mutex profile buckets - buckhash *[179999]*bucket - bucketmem uintptr + mbuckets *bucket // memory profile buckets + bbuckets *bucket // blocking profile buckets + xbuckets *bucket // mutex profile buckets + sbuckets *bucket // pre-symbolization profile buckets (stacks fixed up) + freebuckets *bucket // freelist of unused fixed up profile buckets + buckhash *[179999]*bucket + bucketmem uintptr mProf struct { // All fields in mProf are protected by proflock. @@ -158,12 +164,35 @@ var ( const mProfCycleWrap = uint32(len(memRecord{}.future)) * (2 << 24) +// payloadOffset() returns a pointer into the part of a bucket +// containing the profile payload (skips past the bucket struct itself +// and then the stack trace). +func payloadOffset(typ bucketType, nstk uintptr) uintptr { + if typ == prunedProfile { + // To allow reuse of prunedProfile buckets between different + // collections, allocate them with the max stack size (the portion + // of the stack used will vary from trace to trace). + nstk = maxStack + } + return unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(uintptr) +} + +func max(x, y uintptr) uintptr { + if x > y { + return x + } + return y +} + // newBucket allocates a bucket with the given type and number of stack entries. func newBucket(typ bucketType, nstk int) *bucket { - size := unsafe.Sizeof(bucket{}) + uintptr(nstk)*unsafe.Sizeof(location{}) + size := payloadOffset(typ, uintptr(nstk)) switch typ { default: throw("invalid profile bucket type") + case prunedProfile: + // stack-fixed buckets are large enough to accommodate any payload. + size += max(unsafe.Sizeof(memRecord{}), unsafe.Sizeof(blockRecord{})) case memProfile: size += unsafe.Sizeof(memRecord{}) case blockProfile, mutexProfile: @@ -178,31 +207,29 @@ func newBucket(typ bucketType, nstk int) } // stk returns the slice in b holding the stack. -func (b *bucket) stk() []location { - stk := (*[maxStack]location)(add(unsafe.Pointer(b), unsafe.Sizeof(*b))) +func (b *bucket) stk() []uintptr { + stk := (*[maxStack]uintptr)(add(unsafe.Pointer(b), unsafe.Sizeof(*b))) return stk[:b.nstk:b.nstk] } // mp returns the memRecord associated with the memProfile bucket b. func (b *bucket) mp() *memRecord { - if b.typ != memProfile { + if b.typ != memProfile && b.typ != prunedProfile { throw("bad use of bucket.mp") } - data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(location{})) - return (*memRecord)(data) + return (*memRecord)(add(unsafe.Pointer(b), payloadOffset(b.typ, b.nstk))) } // bp returns the blockRecord associated with the blockProfile bucket b. func (b *bucket) bp() *blockRecord { - if b.typ != blockProfile && b.typ != mutexProfile { + if b.typ != blockProfile && b.typ != mutexProfile && b.typ != prunedProfile { throw("bad use of bucket.bp") } - data := add(unsafe.Pointer(b), unsafe.Sizeof(*b)+b.nstk*unsafe.Sizeof(location{})) - return (*blockRecord)(data) + return (*blockRecord)(add(unsafe.Pointer(b), payloadOffset(b.typ, b.nstk))) } // Return the bucket for stk[0:nstk], allocating new bucket if needed. -func stkbucket(typ bucketType, size uintptr, stk []location, alloc bool) *bucket { +func stkbucket(typ bucketType, size uintptr, stk []uintptr, alloc bool) *bucket { if buckhash == nil { buckhash = (*[buckHashSize]*bucket)(sysAlloc(unsafe.Sizeof(*buckhash), &memstats.buckhash_sys)) if buckhash == nil { @@ -212,8 +239,8 @@ func stkbucket(typ bucketType, size uint // Hash stack. var h uintptr - for _, loc := range stk { - h += loc.pc + for _, pc := range stk { + h += pc h += h << 10 h ^= h >> 6 } @@ -249,6 +276,9 @@ func stkbucket(typ bucketType, size uint } else if typ == mutexProfile { b.allnext = xbuckets xbuckets = b + } else if typ == prunedProfile { + b.allnext = sbuckets + sbuckets = b } else { b.allnext = bbuckets bbuckets = b @@ -256,7 +286,7 @@ func stkbucket(typ bucketType, size uint return b } -func eqslice(x, y []location) bool { +func eqslice(x, y []uintptr) bool { if len(x) != len(y) { return false } @@ -338,8 +368,8 @@ func mProf_PostSweep() { // Called by malloc to record a profiled block. func mProf_Malloc(p unsafe.Pointer, size uintptr) { - var stk [maxStack]location - nstk := callers(4, stk[:]) + var stk [maxStack]uintptr + nstk := callersRaw(1, stk[:]) lock(&proflock) b := stkbucket(memProfile, size, stk[:nstk], true) c := mProf.cycle @@ -414,13 +444,13 @@ func blocksampled(cycles int64) bool { func saveblockevent(cycles int64, skip int, which bucketType) { gp := getg() var nstk int - var stk [maxStack]location + var stk [maxStack]uintptr if gp.m.curg == nil || gp.m.curg == gp { - nstk = callers(skip, stk[:]) + nstk = callersRaw(skip, stk[:]) } else { // FIXME: This should get a traceback of gp.m.curg. // nstk = gcallers(gp.m.curg, skip, stk[:]) - nstk = callers(skip, stk[:]) + nstk = callersRaw(skip, stk[:]) } lock(&proflock) b := stkbucket(which, 0, stk[:nstk], true) @@ -521,6 +551,150 @@ func (r *MemProfileRecord) Stack() []uin return r.Stack0[0:] } +// reusebucket tries to pick a prunedProfile bucket off +// the freebuckets list, returning it if one is available or nil +// if the free list is empty. +func reusebucket(nstk int) *bucket { + var b *bucket + if freebuckets != nil { + b = freebuckets + freebuckets = freebuckets.allnext + b.typ = prunedProfile + b.nstk = uintptr(nstk) + mp := b.mp() + // Hack: rely on the fact that memprofile records are + // larger than blockprofile records when clearing. + *mp = memRecord{} + } + return b +} + +// freebucket appends the specified prunedProfile bucket +// onto the free list, and removes references to it from the hash. +func freebucket(tofree *bucket) *bucket { + // Thread this bucket into the free list. + ret := tofree.allnext + tofree.allnext = freebuckets + freebuckets = tofree + + // Clean up the hash. The hash may point directly to this bucket... + i := int(tofree.hash % buckHashSize) + if buckhash[i] == tofree { + buckhash[i] = tofree.next + } else { + // ... or when this bucket was inserted by stkbucket, it may have been + // chained off some other unrelated bucket. + for b := buckhash[i]; b != nil; b = b.next { + if b.next == tofree { + b.next = tofree.next + break + } + } + } + return ret +} + +// fixupStack takes a 'raw' stack trace (stack of PCs generated by +// callersRaw) and performs pre-symbolization fixup on it, returning +// the results in 'canonStack'. For each frame we look at the +// file/func/line information, then use that info to decide whether to +// include the frame in the final symbolized stack (removing frames +// corresponding to 'morestack' routines, for example). We also expand +// frames if the PC values to which they refer correponds to inlined +// functions to allow for expanded symbolic info to be filled in +// later. Note: there is code in go-callers.c's backtrace_full callback() +// function that performs very similar fixups; these two code paths +// should be kept in sync. +func fixupStack(stk []uintptr, canonStack *[maxStack]uintptr, size uintptr) int { + var cidx int + var termTrace bool + for _, pc := range stk { + // Subtract 1 from PC to undo the 1 we added in callback in + // go-callers.c. + function, file, _, frames := funcfileline(pc-1, -1) + + // Skip split-stack functions (match by function name) + skipFrame := false + if hasPrefix(function, "_____morestack_") || hasPrefix(function, "__morestack_") { + skipFrame = true + } + + // Skip split-stack functions (match by file) + if hasSuffix(file, "/morestack.S") { + skipFrame = true + } + + // Skip thunks and recover functions. There is no equivalent to + // these functions in the gc toolchain. + fcn := function + if hasSuffix(fcn, "..r") { + skipFrame = true + } else { + for fcn != "" && (fcn[len(fcn)-1] >= '0' && fcn[len(fcn)-1] <= '9') { + fcn = fcn[:len(fcn)-1] + } + if hasSuffix(fcn, "..stub") || hasSuffix(fcn, "..thunk") { + skipFrame = true + } + } + if skipFrame { + continue + } + + // Terminate the trace if we encounter a frame corresponding to + // runtime.main, runtime.kickoff, makecontext, etc. See the + // corresponding code in go-callers.c, callback function used + // with backtrace_full. + if function == "makecontext" { + termTrace = true + } + if hasSuffix(file, "/proc.c") && function == "runtime_mstart" { + termTrace = true + } + if hasSuffix(file, "/proc.go") && + (function == "runtime.main" || function == "runtime.kickoff") { + termTrace = true + } + + // Expand inline frames. + for i := 0; i < frames; i++ { + (*canonStack)[cidx] = pc + cidx++ + if cidx >= maxStack { + termTrace = true + break + } + } + if termTrace { + break + } + } + return cidx +} + +// fixupBucket takes a raw memprofile bucket and creates a new bucket +// in which the stack trace has been fixed up (inline frames expanded, +// unwanted frames stripped out). Original bucket is left unmodified; +// a new symbolizeProfile bucket may be generated as a side effect. +// Payload information from the original bucket is incorporated into +// the new bucket. +func fixupBucket(b *bucket) { + var canonStack [maxStack]uintptr + frames := fixupStack(b.stk(), &canonStack, b.size) + cb := stkbucket(prunedProfile, b.size, canonStack[:frames], true) + switch b.typ { + default: + throw("invalid profile bucket type") + case memProfile: + rawrecord := b.mp() + cb.mp().active.add(&rawrecord.active) + case blockProfile, mutexProfile: + bpcount := b.bp().count + cb.bp().count += bpcount + cb.bp().cycles += bpcount + } +} + // MemProfile returns a profile of memory allocated and freed per allocation // site. // @@ -576,15 +750,31 @@ func MemProfile(p []MemProfileRecord, in } } if n <= len(p) { - ok = true - idx := 0 - for b := mbuckets; b != nil; b = b.allnext { + var bnext *bucket + + // Post-process raw buckets to fix up their stack traces + for b := mbuckets; b != nil; b = bnext { + bnext = b.allnext mp := b.mp() if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes { - record(&p[idx], b) - idx++ + fixupBucket(b) } } + + // Record pruned/fixed-up buckets + ok = true + idx := 0 + for b := sbuckets; b != nil; b = b.allnext { + record(&p[idx], b) + idx++ + } + n = idx + + // Free up pruned buckets for use in next round + for b := sbuckets; b != nil; b = bnext { + bnext = freebucket(b) + } + sbuckets = nil } unlock(&proflock) return @@ -597,18 +787,18 @@ func record(r *MemProfileRecord, b *buck r.FreeBytes = int64(mp.active.free_bytes) r.AllocObjects = int64(mp.active.allocs) r.FreeObjects = int64(mp.active.frees) - for i, loc := range b.stk() { + for i, pc := range b.stk() { if i >= len(r.Stack0) { break } - r.Stack0[i] = loc.pc + r.Stack0[i] = pc } for i := int(b.nstk); i < len(r.Stack0); i++ { r.Stack0[i] = 0 } } -func iterate_memprof(fn func(*bucket, uintptr, *location, uintptr, uintptr, uintptr)) { +func iterate_memprof(fn func(*bucket, uintptr, *uintptr, uintptr, uintptr, uintptr)) { lock(&proflock) for b := mbuckets; b != nil; b = b.allnext { mp := b.mp() @@ -625,39 +815,59 @@ type BlockProfileRecord struct { StackRecord } -// BlockProfile returns n, the number of records in the current blocking profile. -// If len(p) >= n, BlockProfile copies the profile into p and returns n, true. -// If len(p) < n, BlockProfile does not change p and returns n, false. -// -// Most clients should use the runtime/pprof package or -// the testing package's -test.blockprofile flag instead -// of calling BlockProfile directly. -func BlockProfile(p []BlockProfileRecord) (n int, ok bool) { - lock(&proflock) - for b := bbuckets; b != nil; b = b.allnext { +func harvestBlockMutexProfile(buckets *bucket, p []BlockProfileRecord) (n int, ok bool) { + for b := buckets; b != nil; b = b.allnext { n++ } if n <= len(p) { + var bnext *bucket + + // Post-process raw buckets to create pruned/fixed-up buckets + for b := buckets; b != nil; b = bnext { + bnext = b.allnext + fixupBucket(b) + } + + // Record ok = true - for b := bbuckets; b != nil; b = b.allnext { + for b := sbuckets; b != nil; b = b.allnext { bp := b.bp() r := &p[0] r.Count = bp.count r.Cycles = bp.cycles i := 0 - var loc location - for i, loc = range b.stk() { + var pc uintptr + for i, pc = range b.stk() { if i >= len(r.Stack0) { break } - r.Stack0[i] = loc.pc + r.Stack0[i] = pc } for ; i < len(r.Stack0); i++ { r.Stack0[i] = 0 } p = p[1:] } + + // Free up pruned buckets for use in next round. + for b := sbuckets; b != nil; b = bnext { + bnext = freebucket(b) + } + sbuckets = nil } + return +} + +// BlockProfile returns n, the number of records in the current blocking profile. +// If len(p) >= n, BlockProfile copies the profile into p and returns n, true. +// If len(p) < n, BlockProfile does not change p and returns n, false. +// +// Most clients should use the runtime/pprof package or +// the testing package's -test.blockprofile flag instead +// of calling BlockProfile directly. +func BlockProfile(p []BlockProfileRecord) (n int, ok bool) { + lock(&proflock) + n, ok = harvestBlockMutexProfile(bbuckets, p) unlock(&proflock) return } @@ -670,30 +880,7 @@ func BlockProfile(p []BlockProfileRecord // instead of calling MutexProfile directly. func MutexProfile(p []BlockProfileRecord) (n int, ok bool) { lock(&proflock) - for b := xbuckets; b != nil; b = b.allnext { - n++ - } - if n <= len(p) { - ok = true - for b := xbuckets; b != nil; b = b.allnext { - bp := b.bp() - r := &p[0] - r.Count = int64(bp.count) - r.Cycles = bp.cycles - i := 0 - var loc location - for i, loc = range b.stk() { - if i >= len(r.Stack0) { - break - } - r.Stack0[i] = loc.pc - } - for ; i < len(r.Stack0); i++ { - r.Stack0[i] = 0 - } - p = p[1:] - } - } + n, ok = harvestBlockMutexProfile(xbuckets, p) unlock(&proflock) return } Index: libgo/go/runtime/panic.go =================================================================== --- libgo/go/runtime/panic.go (revision 270877) +++ libgo/go/runtime/panic.go (working copy) @@ -53,7 +53,7 @@ var indexError = error(errorString("inde // entire runtime stack for easier debugging. func panicindex() { - name, _, _ := funcfileline(getcallerpc()-1, -1) + name, _, _, _ := funcfileline(getcallerpc()-1, -1) if hasPrefix(name, "runtime.") { throw(string(indexError.(errorString))) } @@ -64,7 +64,7 @@ func panicindex() { var sliceError = error(errorString("slice bounds out of range")) func panicslice() { - name, _, _ := funcfileline(getcallerpc()-1, -1) + name, _, _, _ := funcfileline(getcallerpc()-1, -1) if hasPrefix(name, "runtime.") { throw(string(sliceError.(errorString))) } Index: libgo/go/runtime/string.go =================================================================== --- libgo/go/runtime/string.go (revision 270877) +++ libgo/go/runtime/string.go (working copy) @@ -360,6 +360,10 @@ func hasPrefix(s, prefix string) bool { return len(s) >= len(prefix) && s[:len(prefix)] == prefix } +func hasSuffix(s, suffix string) bool { + return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix +} + const ( maxUint = ^uint(0) maxInt = int(maxUint >> 1) Index: libgo/go/runtime/symtab.go =================================================================== --- libgo/go/runtime/symtab.go (revision 270877) +++ libgo/go/runtime/symtab.go (working copy) @@ -79,7 +79,7 @@ func (ci *Frames) Next() (frame Frame, m // Subtract 1 from PC to undo the 1 we added in callback in // go-callers.c. - function, file, line := funcfileline(pc-1, int32(i)) + function, file, line, _ := funcfileline(pc-1, int32(i)) if function == "" && file == "" { return Frame{}, more } @@ -158,7 +158,7 @@ const ( // the a *Func describing the innermost function, but with an entry // of the outermost function. func FuncForPC(pc uintptr) *Func { - name, _, _ := funcfileline(pc, -1) + name, _, _, _ := funcfileline(pc, -1) if name == "" { return nil } @@ -187,7 +187,7 @@ func (f *Func) Entry() uintptr { // The result will not be accurate if pc is not a program // counter within f. func (f *Func) FileLine(pc uintptr) (file string, line int) { - _, file, line = funcfileline(pc, -1) + _, file, line, _ = funcfileline(pc, -1) return file, line } @@ -261,5 +261,5 @@ func demangleSymbol(s string) string { } // implemented in go-caller.c -func funcfileline(uintptr, int32) (string, string, int) +func funcfileline(uintptr, int32) (string, string, int, int) func funcentry(uintptr) uintptr Index: libgo/go/runtime/traceback_gccgo.go =================================================================== --- libgo/go/runtime/traceback_gccgo.go (revision 270877) +++ libgo/go/runtime/traceback_gccgo.go (working copy) @@ -20,7 +20,7 @@ func printcreatedby(gp *g) { if entry != 0 && tracepc > entry { tracepc -= sys.PCQuantum } - function, file, line := funcfileline(tracepc, -1) + function, file, line, _ := funcfileline(tracepc, -1) if function != "" && showframe(function, gp, false) && gp.goid != 1 { printcreatedby1(function, file, line, entry, pc) } @@ -61,6 +61,16 @@ func callers(skip int, locbuf []location return int(n) } +//go:noescape +//extern runtime_callersRaw +func c_callersRaw(skip int32, pcs *uintptr, max int32) int32 + +// callersRaw returns a raw (PCs only) stack trace of the current goroutine. +func callersRaw(skip int, pcbuf []uintptr) int { + n := c_callersRaw(int32(skip)+1, &pcbuf[0], int32(len(pcbuf))) + return int(n) +} + // traceback prints a traceback of the current goroutine. // This differs from the gc version, which is given pc, sp, lr and g and // can print a traceback of any goroutine. @@ -83,7 +93,7 @@ func traceback(skip int32) { func printAncestorTraceback(ancestor ancestorInfo) { print("[originating from goroutine ", ancestor.goid, "]:\n") for fidx, pc := range ancestor.pcs { - function, file, line := funcfileline(pc, -1) + function, file, line, _ := funcfileline(pc, -1) if showfuncinfo(function, fidx == 0) { printAncestorTracebackFuncInfo(function, file, line, pc) } @@ -92,7 +102,7 @@ func printAncestorTraceback(ancestor anc print("...additional frames elided...\n") } // Show what created goroutine, except main goroutine (goid 1). - function, file, line := funcfileline(ancestor.gopc, -1) + function, file, line, _ := funcfileline(ancestor.gopc, -1) if function != "" && showfuncinfo(function, false) && ancestor.goid != 1 { printcreatedby1(function, file, line, funcentry(ancestor.gopc), ancestor.gopc) } Index: libgo/runtime/go-caller.c =================================================================== --- libgo/runtime/go-caller.c (revision 270877) +++ libgo/runtime/go-caller.c (working copy) @@ -26,11 +26,13 @@ struct caller String file; intgo line; intgo index; + intgo frames; }; /* Collect file/line information for a PC value. If this is called - more than once, due to inlined functions, we use the last call, as - that is usually the most useful one. */ + more than once, due to inlined functions, we record the number of + inlined frames but return file/func/line for the last call, as + that is usually the most useful one. */ static int callback (void *data, uintptr_t pc __attribute__ ((unused)), @@ -38,6 +40,8 @@ callback (void *data, uintptr_t pc __att { struct caller *c = (struct caller *) data; + c->frames++; + /* The libbacktrace library says that these strings might disappear, but with the current implementation they won't. We can't easily allocate memory here, so for now assume that we can save a @@ -125,18 +129,19 @@ __go_get_backtrace_state () return back_state; } -/* Return function/file/line information for PC. The index parameter +/* Return function/file/line/nframes information for PC. The index parameter is the entry on the stack of inlined functions; -1 means the last - one. */ + one, with *nframes set to the count of inlined frames for this PC. */ static _Bool -__go_file_line (uintptr pc, int index, String *fn, String *file, intgo *line) +__go_file_line (uintptr pc, int index, String *fn, String *file, intgo *line, intgo *nframes) { struct caller c; struct backtrace_state *state; runtime_memclr (&c, sizeof c); c.index = index; + c.frames = 0; runtime_xadd (&__go_runtime_in_callers, 1); state = __go_get_backtrace_state (); runtime_xadd (&__go_runtime_in_callers, -1); @@ -144,6 +149,7 @@ __go_file_line (uintptr pc, int index, S *fn = c.fn; *file = c.file; *line = c.line; + *nframes = c.frames; // If backtrace_pcinfo didn't get the function name from the debug // info, try to get it from the symbol table. @@ -222,7 +228,7 @@ runtime_funcfileline (uintptr targetpc, struct funcfileline_return ret; if (!__go_file_line (targetpc, index, &ret.retfn, &ret.retfile, - &ret.retline)) + &ret.retline, &ret.retframes)) runtime_memclr (&ret, sizeof ret); return ret; } Index: libgo/runtime/go-callers.c =================================================================== --- libgo/runtime/go-callers.c (revision 270877) +++ libgo/runtime/go-callers.c (working copy) @@ -63,7 +63,9 @@ callback (void *data, uintptr_t pc, cons /* Skip thunks and recover functions. There is no equivalent to these functions in the gc toolchain, so returning them here means - significantly different results for runtime.Caller(N). */ + significantly different results for runtime.Caller(N). See also + similar code in runtime/mprof.go that strips out such functions + for block/mutex/memory profiles. */ if (function != NULL && !arg->keep_thunks) { const char *p; @@ -262,3 +264,62 @@ Callers (intgo skip, struct __go_open_ar return ret; } + +struct callersRaw_data +{ + uintptr* pcbuf; + int skip; + int index; + int max; +}; + +// Callback function for backtrace_simple. Just collect pc's. +// Return zero to continue, non-zero to stop. + +static int callback_raw (void *data, uintptr_t pc) +{ + struct callersRaw_data *arg = (struct callersRaw_data *) data; + + if (arg->skip > 0) + { + --arg->skip; + return 0; + } + + /* On the call to backtrace_simple the pc value was most likely + decremented if there was a normal call, since the pc referred to + the instruction where the call returned and not the call itself. + This was done so that the line number referred to the call + instruction. To make sure the actual pc from the call stack is + used, it is incremented here. + + In the case of a signal, the pc was not decremented by + backtrace_full but still incremented here. That doesn't really + hurt anything since the line number is right and the pc refers to + the same instruction. */ + + arg->pcbuf[arg->index] = pc + 1; + arg->index++; + return arg->index >= arg->max; +} + +/* runtime_callersRaw is similar to runtime_callers() above, but + it returns raw PC values as opposed to file/func/line locations. */ +int32 +runtime_callersRaw (int32 skip, uintptr *pcbuf, int32 m) +{ + struct callersRaw_data data; + struct backtrace_state* state; + + data.pcbuf = pcbuf; + data.skip = skip + 1; + data.index = 0; + data.max = m; + runtime_xadd (&__go_runtime_in_callers, 1); + state = __go_get_backtrace_state (); + backtrace_simple (state, 0, callback_raw, error_callback, &data); + runtime_xadd (&__go_runtime_in_callers, -1); + + return data.index; +} + Index: libgo/runtime/runtime.h =================================================================== --- libgo/runtime/runtime.h (revision 271088) +++ libgo/runtime/runtime.h (working copy) @@ -485,6 +485,7 @@ struct funcfileline_return String retfn; String retfile; intgo retline; + intgo retframes; }; struct funcfileline_return