modular solution, first working draft

symbolyze/ now contains a module that exposes a Finder type with a
simple API, like:

	finder := symbolyze.New("_PyRuntime", "*python3*")
	finder.Debug(true)
	finder.OnFound(mapFD.Set)
	finder.Run()

Instead of writing (pid, offset) directly to a eBPF-map, it implements
an observer-pattern and expects a callback.

TODOs/next steps:

	- Write documentation
	- Add tests
	- Experiment and re-evaluate design
This commit is contained in:
Özgür Kesim 2020-01-15 20:42:53 +01:00
parent a9f0f27ee2
commit fb59ca1072
2 changed files with 270 additions and 186 deletions

View File

@ -1,15 +1,11 @@
package main
import (
"bufio"
"debug/elf"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/optimyze-interviews/OezguerKesim/GetRuntimeAddresses/ebpf"
"github.com/optimyze-interviews/OezguerKesim/GetRuntimeAddresses/symbolyze"
)
func main() {
@ -25,40 +21,9 @@ func main() {
// Solution to your tasks goes here
//
proc, err := os.Open("/proc")
if err != nil {
fmt.Printf("Failed to open /proc: %v\n", err)
os.Exit(1)
}
infos, err := proc.Readdir(-1)
if err != nil {
fmt.Printf("Failed to read /proc: %v\n", err)
os.Exit(1)
}
proc.Close()
var (
ownpid = os.Getpid()
)
for _, pinfo := range infos {
var pid_s = pinfo.Name()
// The entry /proc/NNN/ must be a directory with integer name
if !pinfo.IsDir() {
continue
} else if pid, err := strconv.Atoi(pid_s); err != nil {
continue
} else if pid == ownpid { // skip our own pid
continue
} else if offset, found := searchSymbolIn(pid_s, "*python3*", "_PyRuntime"); !found {
continue
} else {
mapFD.Add(pid, uint64(offset))
}
}
finder := symbolyze.New("_PyRuntime", "*python3*")
finder.OnFound(mapFD.Set)
finder.Run()
mapContents, err := mapFD.GetMap()
if err != nil {
@ -72,150 +37,3 @@ func main() {
}
os.Exit(0)
}
type region struct {
start uint64 // could be uintptr
end uint64
}
func parseRegion(in string) (r region, e error) {
parts := strings.Split(in, "-")
if len(parts) != 2 {
e = fmt.Errorf("[parseRegion] unrecognized format for region: %#q", in)
return
}
r.start, e = strconv.ParseUint(parts[0], 16, 64)
if e != nil {
e = fmt.Errorf("[parseRegion] couldn't parse start-address %#q in %#q: %w", parts[0], in, e)
return
}
r.end, e = strconv.ParseUint(parts[1], 16, 64)
if e != nil {
e = fmt.Errorf("[parseRegion] couldn't parse end-address %#q in %#q: %w", parts[1], in, e)
return
}
return
}
func searchSymbolIn(pid, glob, symbol string) (offset uint64, ok bool) {
// read the maps file for the binary and shared libraries
path := filepath.Join("/proc", pid, "maps")
maps, err := os.Open(path)
if err != nil {
// fmt.Printf("Warning: Failed to read %#q: %v\n", path, err)
return
}
scanner := bufio.NewScanner(maps)
for scanner.Scan() {
// address perms offset dev inode pathname
// 7fdd8fece000-7fdd8ff74000 rw-p 00423000 fd:01 14156759 /usr/lib/x86_64-linux-gnu/libpython3.7m.so.1.0
fields := strings.Fields(scanner.Text())
// TODO: we assume that the pathname contains no spaces so
// bytes.Fields splits the line excactly into six fields
if len(fields) != 6 {
continue
}
pathname := fields[5]
if !strings.HasPrefix(pathname, "/") { // Not a pathname
continue
}
filename := filepath.Base(pathname)
ok, err := filepath.Match(glob, filename)
if err != nil || !ok {
continue
}
if fields[1] != "rw-p" { // symbol needs to be writable
continue
}
sym, section, err := findSymbol("_PyRuntime", pathname)
if err != nil || section == nil || sym == nil {
// TODO: error handling
// fmt.Printf("Warning: while reading mapped file %q: %w", pathname, err)
continue
}
arange, err := parseRegion(fields[0])
if err != nil {
fmt.Printf("%w\n", err)
continue
}
fileoffset, err := strconv.ParseUint(fields[2], 16, 64)
if err != nil {
fmt.Printf("Error while parsing fileoffset %#q: %w\n", fields[2], err)
continue
}
memoff := sym.Value - section.Addr + alignedOffset(section)
// fmt.Printf("pid: %s\nsym: %#v\nsection: %#v\nmemoff: 0x%x\narange: %#v\nmap-fileoffset: 0x%x\npathname: %s\n", pid, sym, section, memoff, arange, fileoffset, pathname)
// stop when only _one_ is found
return arange.start + memoff - fileoffset, true
}
return 0, false
}
func findSymbol(symbol string, pathname string) (*elf.Symbol, *elf.SectionHeader, error) {
// TODO: caching
var (
sym *elf.Symbol
section *elf.Section
header *elf.SectionHeader
)
file, err := elf.Open(pathname)
if err != nil {
return nil, nil, err
}
symbols, err := file.DynamicSymbols()
if err != nil {
return nil, nil, err
}
for _, s := range symbols {
if s.Name == symbol {
sym = &s
break
}
}
if sym == nil {
return nil, nil, nil
}
if len(file.Sections) < int(sym.Section) {
return nil, nil, nil
}
section = file.Sections[sym.Section]
if section == nil {
return nil, nil, nil
}
header = &section.SectionHeader
return sym, header, nil
}
func alignedOffset(section *elf.SectionHeader) uint64 {
mask := section.Addralign - 1
return (section.Offset + mask) & (^mask)
}

View File

@ -0,0 +1,266 @@
package symbolyze
import (
"bufio"
"debug/elf"
"fmt"
"log"
"os"
"path/filepath"
"strconv"
"strings"
)
type Finder struct {
symbol string
pathglob string
cache map[string]uint64
finds map[int]uint64
ownpid int
observer func(key int, value uint64) error
*log.Logger
debugf func(format string, v ...interface{})
debugln func(v ...interface{})
err error
}
var (
nodebugf = func(string, ...interface{}) {}
nodebugln = func(...interface{}) {}
)
func New(symbol, pathglob string) *Finder {
return &Finder{
symbol: symbol,
pathglob: pathglob,
cache: map[string]uint64{},
finds: map[int]uint64{},
ownpid: os.Getpid(),
Logger: log.New(os.Stdout, "[symbolyze] ", log.LstdFlags),
debugf: nodebugf,
debugln: nodebugln,
}
}
// not threadsafe
func (F *Finder) Debug(on bool) {
if on {
F.debugf = F.Printf
F.debugln = F.Println
F.debugln("starting in debug-mode")
} else {
F.debugf = nodebugf
F.debugln = nodebugln
}
}
func (F *Finder) setErrorf(format string, a ...interface{}) {
F.err = fmt.Errorf(format, a)
F.Printf(format, a)
}
type Observer func(int, uint64) error
// not threadsafe
func (F *Finder) OnFound(fun Observer) {
F.observer = fun
return
}
func (F *Finder) Run() error {
if F.err != nil {
return F.err
}
proc, err := os.Open("/proc")
if err != nil {
F.setErrorf("Failed to open /proc: %w\n", err)
return F.err
}
infos, err := proc.Readdir(-1)
if err != nil {
F.setErrorf("Failed to read /proc: %w\n", err)
return F.err
}
proc.Close()
for _, pinfo := range infos {
var pid_s = pinfo.Name()
// The entry /proc/NNN/ must be a directory with integer name
if !pinfo.IsDir() {
continue
} else if pid, err := strconv.Atoi(pid_s); err != nil {
continue
} else if pid == F.ownpid { // skip our own pid
continue
} else if offset, found := F.searchSymbolIn(pid); !found {
continue
} else {
F.finds[pid] = uint64(offset)
if F.observer != nil {
// TODO: accumulate errors?
err = F.observer(pid, uint64(offset))
if err != nil {
F.debugf("F.observer error: %w", err)
}
}
}
}
return nil
}
func (F *Finder) searchSymbolIn(pid int) (offset uint64, ok bool) {
// read the maps file for the binary and shared libraries
path := filepath.Join("/proc", strconv.Itoa(pid), "maps")
maps, err := os.Open(path)
if err != nil {
// fmt.Printf("Warning: Failed to read %#q: %v\n", path, err)
return
}
scanner := bufio.NewScanner(maps)
for scanner.Scan() {
// 0 1 2 3 4 5
// address perms offset dev inode pathname
// 7fdd8fece000-7fdd8ff74000 rw-p 00423000 fd:01 14156759 /usr/lib/x86_64-linux-gnu/libpython3.7m.so.1.0
fields := strings.Fields(scanner.Text())
// TODO: we assume that the pathname contains no spaces so
// bytes.Fields splits the line excactly into six fields
if len(fields) != 6 {
continue
}
pathname := fields[5]
if !strings.HasPrefix(pathname, "/") { // Not a pathname
continue
}
filename := filepath.Base(pathname)
ok, err := filepath.Match(F.pathglob, filename)
if err != nil || !ok {
continue
}
if fields[1] != "rw-p" { // symbol needs to be writable
continue
}
memOffset, found := F.findSymbol(pathname)
if !found {
continue
}
start, _, err := parseRange(fields[0])
if err != nil {
fmt.Printf("%w\n", err)
continue
}
fileoffset, err := strconv.ParseUint(fields[2], 16, 64)
if err != nil {
fmt.Printf("Error while parsing fileoffset %#q: %w\n", fields[2], err)
continue
}
F.finds[pid] = start + memOffset - fileoffset
return start + memOffset - fileoffset, true
}
return 0, false
}
func (F *Finder) findSymbol(pathname string) (offset uint64, found bool) {
if offset, found = F.cache[pathname]; found {
return offset, found
}
file, err := elf.Open(pathname)
if err != nil {
F.setErrorf("elf.Open(%s): %w", pathname, err)
return 0, false
}
defer file.Close()
symbols, err := file.DynamicSymbols()
if err != nil {
F.setErrorf("file.DynamicSymbols(): %w", err)
return 0, false
}
var sym *elf.Symbol
for _, s := range symbols {
if s.Name == F.symbol {
F.debugf("Found symbol %#v in %s: %#v\n", sym, pathname, s)
sym = &s
break
}
}
if sym == nil {
F.debugf("symbol %q not found in %s\n", sym, pathname)
return 0, false
}
if len(file.Sections) < int(sym.Section) {
F.debugf("len(file.Section) < int(sym.Section) for symbol %q in %s\n", sym, pathname)
return 0, false
}
section := file.Sections[sym.Section]
if section == nil {
F.debugf("Section %v not found for ELF-Header %q in %s\n", sym.Section, pathname)
return 0, false
}
header := &section.SectionHeader
memoffset := sym.Value - header.Addr + alignedOffset(header)
F.cache[pathname] = memoffset
return memoffset, true
}
func parseRange(input string) (start, end uint64, e error) {
// 7fdd8fece000-7fdd8ff74000
parts := strings.Split(input, "-")
if len(parts) != 2 {
e = fmt.Errorf("[parseRange] unrecognized format for region: %#q", input)
return 0, 0, e
}
start, e = strconv.ParseUint(parts[0], 16, 64)
if e != nil {
e = fmt.Errorf("[parseRange] couldn't parse start-address %#q in %#q: %w", parts[0], input, e)
return 0, 0, e
}
end, e = strconv.ParseUint(parts[1], 16, 64)
if e != nil {
e = fmt.Errorf("[parseRange] couldn't parse end-address %#q in %#q: %w", parts[1], input, e)
return 0, 0, e
}
return start, end, e
}
func alignedOffset(section *elf.SectionHeader) uint64 {
mask := section.Addralign - 1
return (section.Offset + mask) & (^mask)
}