mirror of
https://github.com/dutchcoders/transfer.sh.git
synced 2020-11-18 19:53:40 -08:00
463 lines
13 KiB
Go
463 lines
13 KiB
Go
// Copyright 2014 Google Inc. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Package binutils provides access to the GNU binutils.
|
|
package binutils
|
|
|
|
import (
|
|
"debug/elf"
|
|
"debug/macho"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"regexp"
|
|
"runtime"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/google/pprof/internal/elfexec"
|
|
"github.com/google/pprof/internal/plugin"
|
|
)
|
|
|
|
// A Binutils implements plugin.ObjTool by invoking the GNU binutils.
|
|
type Binutils struct {
|
|
mu sync.Mutex
|
|
rep *binrep
|
|
}
|
|
|
|
// binrep is an immutable representation for Binutils. It is atomically
|
|
// replaced on every mutation to provide thread-safe access.
|
|
type binrep struct {
|
|
// Commands to invoke.
|
|
llvmSymbolizer string
|
|
llvmSymbolizerFound bool
|
|
addr2line string
|
|
addr2lineFound bool
|
|
nm string
|
|
nmFound bool
|
|
objdump string
|
|
objdumpFound bool
|
|
|
|
// if fast, perform symbolization using nm (symbol names only),
|
|
// instead of file-line detail from the slower addr2line.
|
|
fast bool
|
|
}
|
|
|
|
// get returns the current representation for bu, initializing it if necessary.
|
|
func (bu *Binutils) get() *binrep {
|
|
bu.mu.Lock()
|
|
r := bu.rep
|
|
if r == nil {
|
|
r = &binrep{}
|
|
initTools(r, "")
|
|
bu.rep = r
|
|
}
|
|
bu.mu.Unlock()
|
|
return r
|
|
}
|
|
|
|
// update modifies the rep for bu via the supplied function.
|
|
func (bu *Binutils) update(fn func(r *binrep)) {
|
|
r := &binrep{}
|
|
bu.mu.Lock()
|
|
defer bu.mu.Unlock()
|
|
if bu.rep == nil {
|
|
initTools(r, "")
|
|
} else {
|
|
*r = *bu.rep
|
|
}
|
|
fn(r)
|
|
bu.rep = r
|
|
}
|
|
|
|
// String returns string representation of the binutils state for debug logging.
|
|
func (bu *Binutils) String() string {
|
|
r := bu.get()
|
|
var llvmSymbolizer, addr2line, nm, objdump string
|
|
if r.llvmSymbolizerFound {
|
|
llvmSymbolizer = r.llvmSymbolizer
|
|
}
|
|
if r.addr2lineFound {
|
|
addr2line = r.addr2line
|
|
}
|
|
if r.nmFound {
|
|
nm = r.nm
|
|
}
|
|
if r.objdumpFound {
|
|
objdump = r.objdump
|
|
}
|
|
return fmt.Sprintf("llvm-symbolizer=%q addr2line=%q nm=%q objdump=%q fast=%t",
|
|
llvmSymbolizer, addr2line, nm, objdump, r.fast)
|
|
}
|
|
|
|
// SetFastSymbolization sets a toggle that makes binutils use fast
|
|
// symbolization (using nm), which is much faster than addr2line but
|
|
// provides only symbol name information (no file/line).
|
|
func (bu *Binutils) SetFastSymbolization(fast bool) {
|
|
bu.update(func(r *binrep) { r.fast = fast })
|
|
}
|
|
|
|
// SetTools processes the contents of the tools option. It
|
|
// expects a set of entries separated by commas; each entry is a pair
|
|
// of the form t:path, where cmd will be used to look only for the
|
|
// tool named t. If t is not specified, the path is searched for all
|
|
// tools.
|
|
func (bu *Binutils) SetTools(config string) {
|
|
bu.update(func(r *binrep) { initTools(r, config) })
|
|
}
|
|
|
|
func initTools(b *binrep, config string) {
|
|
// paths collect paths per tool; Key "" contains the default.
|
|
paths := make(map[string][]string)
|
|
for _, t := range strings.Split(config, ",") {
|
|
name, path := "", t
|
|
if ct := strings.SplitN(t, ":", 2); len(ct) == 2 {
|
|
name, path = ct[0], ct[1]
|
|
}
|
|
paths[name] = append(paths[name], path)
|
|
}
|
|
|
|
defaultPath := paths[""]
|
|
b.llvmSymbolizer, b.llvmSymbolizerFound = findExe("llvm-symbolizer", append(paths["llvm-symbolizer"], defaultPath...))
|
|
b.addr2line, b.addr2lineFound = findExe("addr2line", append(paths["addr2line"], defaultPath...))
|
|
if !b.addr2lineFound {
|
|
// On MacOS, brew installs addr2line under gaddr2line name, so search for
|
|
// that if the tool is not found by its default name.
|
|
b.addr2line, b.addr2lineFound = findExe("gaddr2line", append(paths["addr2line"], defaultPath...))
|
|
}
|
|
b.nm, b.nmFound = findExe("nm", append(paths["nm"], defaultPath...))
|
|
b.objdump, b.objdumpFound = findExe("objdump", append(paths["objdump"], defaultPath...))
|
|
}
|
|
|
|
// findExe looks for an executable command on a set of paths.
|
|
// If it cannot find it, returns cmd.
|
|
func findExe(cmd string, paths []string) (string, bool) {
|
|
for _, p := range paths {
|
|
cp := filepath.Join(p, cmd)
|
|
if c, err := exec.LookPath(cp); err == nil {
|
|
return c, true
|
|
}
|
|
}
|
|
return cmd, false
|
|
}
|
|
|
|
// Disasm returns the assembly instructions for the specified address range
|
|
// of a binary.
|
|
func (bu *Binutils) Disasm(file string, start, end uint64) ([]plugin.Inst, error) {
|
|
b := bu.get()
|
|
cmd := exec.Command(b.objdump, "-d", "-C", "--no-show-raw-insn", "-l",
|
|
fmt.Sprintf("--start-address=%#x", start),
|
|
fmt.Sprintf("--stop-address=%#x", end),
|
|
file)
|
|
out, err := cmd.Output()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("%v: %v", cmd.Args, err)
|
|
}
|
|
|
|
return disassemble(out)
|
|
}
|
|
|
|
// Open satisfies the plugin.ObjTool interface.
|
|
func (bu *Binutils) Open(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
|
|
b := bu.get()
|
|
|
|
// Make sure file is a supported executable.
|
|
// This uses magic numbers, mainly to provide better error messages but
|
|
// it should also help speed.
|
|
|
|
if _, err := os.Stat(name); err != nil {
|
|
// For testing, do not require file name to exist.
|
|
if strings.Contains(b.addr2line, "testdata/") {
|
|
return &fileAddr2Line{file: file{b: b, name: name}}, nil
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
// Read the first 4 bytes of the file.
|
|
|
|
f, err := os.Open(name)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error opening %s: %v", name, err)
|
|
}
|
|
defer f.Close()
|
|
|
|
var header [4]byte
|
|
if _, err = io.ReadFull(f, header[:]); err != nil {
|
|
return nil, fmt.Errorf("error reading magic number from %s: %v", name, err)
|
|
}
|
|
|
|
elfMagic := string(header[:])
|
|
|
|
// Match against supported file types.
|
|
if elfMagic == elf.ELFMAG {
|
|
f, err := b.openELF(name, start, limit, offset)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error reading ELF file %s: %v", name, err)
|
|
}
|
|
return f, nil
|
|
}
|
|
|
|
// Mach-O magic numbers can be big or little endian.
|
|
machoMagicLittle := binary.LittleEndian.Uint32(header[:])
|
|
machoMagicBig := binary.BigEndian.Uint32(header[:])
|
|
|
|
if machoMagicLittle == macho.Magic32 || machoMagicLittle == macho.Magic64 ||
|
|
machoMagicBig == macho.Magic32 || machoMagicBig == macho.Magic64 {
|
|
f, err := b.openMachO(name, start, limit, offset)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error reading Mach-O file %s: %v", name, err)
|
|
}
|
|
return f, nil
|
|
}
|
|
if machoMagicLittle == macho.MagicFat || machoMagicBig == macho.MagicFat {
|
|
f, err := b.openFatMachO(name, start, limit, offset)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error reading fat Mach-O file %s: %v", name, err)
|
|
}
|
|
return f, nil
|
|
}
|
|
|
|
return nil, fmt.Errorf("unrecognized binary format: %s", name)
|
|
}
|
|
|
|
func (b *binrep) openMachOCommon(name string, of *macho.File, start, limit, offset uint64) (plugin.ObjFile, error) {
|
|
|
|
// Subtract the load address of the __TEXT section. Usually 0 for shared
|
|
// libraries or 0x100000000 for executables. You can check this value by
|
|
// running `objdump -private-headers <file>`.
|
|
|
|
textSegment := of.Segment("__TEXT")
|
|
if textSegment == nil {
|
|
return nil, fmt.Errorf("could not identify base for %s: no __TEXT segment", name)
|
|
}
|
|
if textSegment.Addr > start {
|
|
return nil, fmt.Errorf("could not identify base for %s: __TEXT segment address (0x%x) > mapping start address (0x%x)",
|
|
name, textSegment.Addr, start)
|
|
}
|
|
|
|
base := start - textSegment.Addr
|
|
|
|
if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
|
|
return &fileNM{file: file{b: b, name: name, base: base}}, nil
|
|
}
|
|
return &fileAddr2Line{file: file{b: b, name: name, base: base}}, nil
|
|
}
|
|
|
|
func (b *binrep) openFatMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
|
|
of, err := macho.OpenFat(name)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error parsing %s: %v", name, err)
|
|
}
|
|
defer of.Close()
|
|
|
|
if len(of.Arches) == 0 {
|
|
return nil, fmt.Errorf("empty fat Mach-O file: %s", name)
|
|
}
|
|
|
|
var arch macho.Cpu
|
|
// Use the host architecture.
|
|
// TODO: This is not ideal because the host architecture may not be the one
|
|
// that was profiled. E.g. an amd64 host can profile a 386 program.
|
|
switch runtime.GOARCH {
|
|
case "386":
|
|
arch = macho.Cpu386
|
|
case "amd64", "amd64p32":
|
|
arch = macho.CpuAmd64
|
|
case "arm", "armbe", "arm64", "arm64be":
|
|
arch = macho.CpuArm
|
|
case "ppc":
|
|
arch = macho.CpuPpc
|
|
case "ppc64", "ppc64le":
|
|
arch = macho.CpuPpc64
|
|
default:
|
|
return nil, fmt.Errorf("unsupported host architecture for %s: %s", name, runtime.GOARCH)
|
|
}
|
|
for i := range of.Arches {
|
|
if of.Arches[i].Cpu == arch {
|
|
return b.openMachOCommon(name, of.Arches[i].File, start, limit, offset)
|
|
}
|
|
}
|
|
return nil, fmt.Errorf("architecture not found in %s: %s", name, runtime.GOARCH)
|
|
}
|
|
|
|
func (b *binrep) openMachO(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
|
|
of, err := macho.Open(name)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error parsing %s: %v", name, err)
|
|
}
|
|
defer of.Close()
|
|
|
|
return b.openMachOCommon(name, of, start, limit, offset)
|
|
}
|
|
|
|
func (b *binrep) openELF(name string, start, limit, offset uint64) (plugin.ObjFile, error) {
|
|
ef, err := elf.Open(name)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error parsing %s: %v", name, err)
|
|
}
|
|
defer ef.Close()
|
|
|
|
var stextOffset *uint64
|
|
var pageAligned = func(addr uint64) bool { return addr%4096 == 0 }
|
|
if strings.Contains(name, "vmlinux") || !pageAligned(start) || !pageAligned(limit) || !pageAligned(offset) {
|
|
// Reading all Symbols is expensive, and we only rarely need it so
|
|
// we don't want to do it every time. But if _stext happens to be
|
|
// page-aligned but isn't the same as Vaddr, we would symbolize
|
|
// wrong. So if the name the addresses aren't page aligned, or if
|
|
// the name is "vmlinux" we read _stext. We can be wrong if: (1)
|
|
// someone passes a kernel path that doesn't contain "vmlinux" AND
|
|
// (2) _stext is page-aligned AND (3) _stext is not at Vaddr
|
|
symbols, err := ef.Symbols()
|
|
if err != nil && err != elf.ErrNoSymbols {
|
|
return nil, err
|
|
}
|
|
for _, s := range symbols {
|
|
if s.Name == "_stext" {
|
|
// The kernel may use _stext as the mapping start address.
|
|
stextOffset = &s.Value
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
base, err := elfexec.GetBase(&ef.FileHeader, elfexec.FindTextProgHeader(ef), stextOffset, start, limit, offset)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("could not identify base for %s: %v", name, err)
|
|
}
|
|
|
|
buildID := ""
|
|
if f, err := os.Open(name); err == nil {
|
|
if id, err := elfexec.GetBuildID(f); err == nil {
|
|
buildID = fmt.Sprintf("%x", id)
|
|
}
|
|
}
|
|
if b.fast || (!b.addr2lineFound && !b.llvmSymbolizerFound) {
|
|
return &fileNM{file: file{b, name, base, buildID}}, nil
|
|
}
|
|
return &fileAddr2Line{file: file{b, name, base, buildID}}, nil
|
|
}
|
|
|
|
// file implements the binutils.ObjFile interface.
|
|
type file struct {
|
|
b *binrep
|
|
name string
|
|
base uint64
|
|
buildID string
|
|
}
|
|
|
|
func (f *file) Name() string {
|
|
return f.name
|
|
}
|
|
|
|
func (f *file) Base() uint64 {
|
|
return f.base
|
|
}
|
|
|
|
func (f *file) BuildID() string {
|
|
return f.buildID
|
|
}
|
|
|
|
func (f *file) SourceLine(addr uint64) ([]plugin.Frame, error) {
|
|
return []plugin.Frame{}, nil
|
|
}
|
|
|
|
func (f *file) Close() error {
|
|
return nil
|
|
}
|
|
|
|
func (f *file) Symbols(r *regexp.Regexp, addr uint64) ([]*plugin.Sym, error) {
|
|
// Get from nm a list of symbols sorted by address.
|
|
cmd := exec.Command(f.b.nm, "-n", f.name)
|
|
out, err := cmd.Output()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("%v: %v", cmd.Args, err)
|
|
}
|
|
|
|
return findSymbols(out, f.name, r, addr)
|
|
}
|
|
|
|
// fileNM implements the binutils.ObjFile interface, using 'nm' to map
|
|
// addresses to symbols (without file/line number information). It is
|
|
// faster than fileAddr2Line.
|
|
type fileNM struct {
|
|
file
|
|
addr2linernm *addr2LinerNM
|
|
}
|
|
|
|
func (f *fileNM) SourceLine(addr uint64) ([]plugin.Frame, error) {
|
|
if f.addr2linernm == nil {
|
|
addr2liner, err := newAddr2LinerNM(f.b.nm, f.name, f.base)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
f.addr2linernm = addr2liner
|
|
}
|
|
return f.addr2linernm.addrInfo(addr)
|
|
}
|
|
|
|
// fileAddr2Line implements the binutils.ObjFile interface, using
|
|
// llvm-symbolizer, if that's available, or addr2line to map addresses to
|
|
// symbols (with file/line number information). It can be slow for large
|
|
// binaries with debug information.
|
|
type fileAddr2Line struct {
|
|
once sync.Once
|
|
file
|
|
addr2liner *addr2Liner
|
|
llvmSymbolizer *llvmSymbolizer
|
|
}
|
|
|
|
func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) {
|
|
f.once.Do(f.init)
|
|
if f.llvmSymbolizer != nil {
|
|
return f.llvmSymbolizer.addrInfo(addr)
|
|
}
|
|
if f.addr2liner != nil {
|
|
return f.addr2liner.addrInfo(addr)
|
|
}
|
|
return nil, fmt.Errorf("could not find local addr2liner")
|
|
}
|
|
|
|
func (f *fileAddr2Line) init() {
|
|
if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base); err == nil {
|
|
f.llvmSymbolizer = llvmSymbolizer
|
|
return
|
|
}
|
|
|
|
if addr2liner, err := newAddr2Liner(f.b.addr2line, f.name, f.base); err == nil {
|
|
f.addr2liner = addr2liner
|
|
|
|
// When addr2line encounters some gcc compiled binaries, it
|
|
// drops interesting parts of names in anonymous namespaces.
|
|
// Fallback to NM for better function names.
|
|
if nm, err := newAddr2LinerNM(f.b.nm, f.name, f.base); err == nil {
|
|
f.addr2liner.nm = nm
|
|
}
|
|
}
|
|
}
|
|
|
|
func (f *fileAddr2Line) Close() error {
|
|
if f.llvmSymbolizer != nil {
|
|
f.llvmSymbolizer.rw.close()
|
|
f.llvmSymbolizer = nil
|
|
}
|
|
if f.addr2liner != nil {
|
|
f.addr2liner.rw.close()
|
|
f.addr2liner = nil
|
|
}
|
|
return nil
|
|
}
|