From fb59ca10722e3d96d98a48a2a04c671804a0ca34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96zg=C3=BCr=20Kesim?= Date: Wed, 15 Jan 2020 20:42:53 +0100 Subject: [PATCH] modular solution, first working draft symbolyze/ now contains a module that exposes a Finder type with a simple API, like: finder := symbolyze.New("_PyRuntime", "*python3*") finder.Debug(true) finder.OnFound(mapFD.Set) finder.Run() Instead of writing (pid, offset) directly to a eBPF-map, it implements an observer-pattern and expects a callback. TODOs/next steps: - Write documentation - Add tests - Experiment and re-evaluate design --- GetRuntimeAddresses/main.go | 190 +-------------- GetRuntimeAddresses/symbolyze/symbolyze.go | 266 +++++++++++++++++++++ 2 files changed, 270 insertions(+), 186 deletions(-) create mode 100644 GetRuntimeAddresses/symbolyze/symbolyze.go diff --git a/GetRuntimeAddresses/main.go b/GetRuntimeAddresses/main.go index 90c75a0..5e5006c 100644 --- a/GetRuntimeAddresses/main.go +++ b/GetRuntimeAddresses/main.go @@ -1,15 +1,11 @@ package main import ( - "bufio" - "debug/elf" "fmt" "os" - "path/filepath" - "strconv" - "strings" "github.com/optimyze-interviews/OezguerKesim/GetRuntimeAddresses/ebpf" + "github.com/optimyze-interviews/OezguerKesim/GetRuntimeAddresses/symbolyze" ) func main() { @@ -25,40 +21,9 @@ func main() { // Solution to your tasks goes here // - proc, err := os.Open("/proc") - if err != nil { - fmt.Printf("Failed to open /proc: %v\n", err) - os.Exit(1) - } - - infos, err := proc.Readdir(-1) - if err != nil { - fmt.Printf("Failed to read /proc: %v\n", err) - os.Exit(1) - } - - proc.Close() - - var ( - ownpid = os.Getpid() - ) - - for _, pinfo := range infos { - var pid_s = pinfo.Name() - - // The entry /proc/NNN/ must be a directory with integer name - if !pinfo.IsDir() { - continue - } else if pid, err := strconv.Atoi(pid_s); err != nil { - continue - } else if pid == ownpid { // skip our own pid - continue - } else if offset, found := searchSymbolIn(pid_s, "*python3*", "_PyRuntime"); !found { - continue - } else { - mapFD.Add(pid, uint64(offset)) - } - } + finder := symbolyze.New("_PyRuntime", "*python3*") + finder.OnFound(mapFD.Set) + finder.Run() mapContents, err := mapFD.GetMap() if err != nil { @@ -72,150 +37,3 @@ func main() { } os.Exit(0) } - -type region struct { - start uint64 // could be uintptr - end uint64 -} - -func parseRegion(in string) (r region, e error) { - parts := strings.Split(in, "-") - - if len(parts) != 2 { - e = fmt.Errorf("[parseRegion] unrecognized format for region: %#q", in) - return - } - - r.start, e = strconv.ParseUint(parts[0], 16, 64) - if e != nil { - e = fmt.Errorf("[parseRegion] couldn't parse start-address %#q in %#q: %w", parts[0], in, e) - return - } - - r.end, e = strconv.ParseUint(parts[1], 16, 64) - if e != nil { - e = fmt.Errorf("[parseRegion] couldn't parse end-address %#q in %#q: %w", parts[1], in, e) - return - } - - return -} - -func searchSymbolIn(pid, glob, symbol string) (offset uint64, ok bool) { - // read the maps file for the binary and shared libraries - path := filepath.Join("/proc", pid, "maps") - maps, err := os.Open(path) - if err != nil { - // fmt.Printf("Warning: Failed to read %#q: %v\n", path, err) - return - } - - scanner := bufio.NewScanner(maps) - for scanner.Scan() { - // address perms offset dev inode pathname - // 7fdd8fece000-7fdd8ff74000 rw-p 00423000 fd:01 14156759 /usr/lib/x86_64-linux-gnu/libpython3.7m.so.1.0 - - fields := strings.Fields(scanner.Text()) - - // TODO: we assume that the pathname contains no spaces so - // bytes.Fields splits the line excactly into six fields - - if len(fields) != 6 { - continue - } - - pathname := fields[5] - - if !strings.HasPrefix(pathname, "/") { // Not a pathname - continue - } - - filename := filepath.Base(pathname) - ok, err := filepath.Match(glob, filename) - if err != nil || !ok { - continue - } - - if fields[1] != "rw-p" { // symbol needs to be writable - continue - } - - sym, section, err := findSymbol("_PyRuntime", pathname) - if err != nil || section == nil || sym == nil { - // TODO: error handling - // fmt.Printf("Warning: while reading mapped file %q: %w", pathname, err) - continue - } - - arange, err := parseRegion(fields[0]) - if err != nil { - fmt.Printf("%w\n", err) - continue - } - - fileoffset, err := strconv.ParseUint(fields[2], 16, 64) - if err != nil { - fmt.Printf("Error while parsing fileoffset %#q: %w\n", fields[2], err) - continue - } - - memoff := sym.Value - section.Addr + alignedOffset(section) - - // fmt.Printf("pid: %s\nsym: %#v\nsection: %#v\nmemoff: 0x%x\narange: %#v\nmap-fileoffset: 0x%x\npathname: %s\n", pid, sym, section, memoff, arange, fileoffset, pathname) - - // stop when only _one_ is found - return arange.start + memoff - fileoffset, true - - } - - return 0, false -} - -func findSymbol(symbol string, pathname string) (*elf.Symbol, *elf.SectionHeader, error) { - // TODO: caching - - var ( - sym *elf.Symbol - section *elf.Section - header *elf.SectionHeader - ) - - file, err := elf.Open(pathname) - if err != nil { - return nil, nil, err - } - - symbols, err := file.DynamicSymbols() - if err != nil { - return nil, nil, err - } - - for _, s := range symbols { - if s.Name == symbol { - sym = &s - break - } - } - - if sym == nil { - return nil, nil, nil - } - - if len(file.Sections) < int(sym.Section) { - return nil, nil, nil - } - - section = file.Sections[sym.Section] - if section == nil { - return nil, nil, nil - } - - header = §ion.SectionHeader - - return sym, header, nil -} - -func alignedOffset(section *elf.SectionHeader) uint64 { - mask := section.Addralign - 1 - return (section.Offset + mask) & (^mask) -} diff --git a/GetRuntimeAddresses/symbolyze/symbolyze.go b/GetRuntimeAddresses/symbolyze/symbolyze.go new file mode 100644 index 0000000..3ef5b8b --- /dev/null +++ b/GetRuntimeAddresses/symbolyze/symbolyze.go @@ -0,0 +1,266 @@ +package symbolyze + +import ( + "bufio" + "debug/elf" + "fmt" + "log" + "os" + "path/filepath" + "strconv" + "strings" +) + +type Finder struct { + symbol string + pathglob string + cache map[string]uint64 + finds map[int]uint64 + ownpid int + observer func(key int, value uint64) error + + *log.Logger + debugf func(format string, v ...interface{}) + debugln func(v ...interface{}) + + err error +} + +var ( + nodebugf = func(string, ...interface{}) {} + nodebugln = func(...interface{}) {} +) + +func New(symbol, pathglob string) *Finder { + return &Finder{ + symbol: symbol, + pathglob: pathglob, + cache: map[string]uint64{}, + finds: map[int]uint64{}, + ownpid: os.Getpid(), + + Logger: log.New(os.Stdout, "[symbolyze] ", log.LstdFlags), + debugf: nodebugf, + debugln: nodebugln, + } +} + +// not threadsafe +func (F *Finder) Debug(on bool) { + if on { + F.debugf = F.Printf + F.debugln = F.Println + F.debugln("starting in debug-mode") + } else { + F.debugf = nodebugf + F.debugln = nodebugln + } +} + +func (F *Finder) setErrorf(format string, a ...interface{}) { + F.err = fmt.Errorf(format, a) + F.Printf(format, a) +} + +type Observer func(int, uint64) error + +// not threadsafe +func (F *Finder) OnFound(fun Observer) { + F.observer = fun + return +} + +func (F *Finder) Run() error { + if F.err != nil { + return F.err + } + + proc, err := os.Open("/proc") + if err != nil { + F.setErrorf("Failed to open /proc: %w\n", err) + return F.err + } + + infos, err := proc.Readdir(-1) + if err != nil { + F.setErrorf("Failed to read /proc: %w\n", err) + return F.err + } + + proc.Close() + + for _, pinfo := range infos { + var pid_s = pinfo.Name() + + // The entry /proc/NNN/ must be a directory with integer name + if !pinfo.IsDir() { + continue + } else if pid, err := strconv.Atoi(pid_s); err != nil { + continue + } else if pid == F.ownpid { // skip our own pid + continue + } else if offset, found := F.searchSymbolIn(pid); !found { + continue + } else { + F.finds[pid] = uint64(offset) + if F.observer != nil { + // TODO: accumulate errors? + err = F.observer(pid, uint64(offset)) + if err != nil { + F.debugf("F.observer error: %w", err) + } + } + } + } + return nil +} + +func (F *Finder) searchSymbolIn(pid int) (offset uint64, ok bool) { + // read the maps file for the binary and shared libraries + path := filepath.Join("/proc", strconv.Itoa(pid), "maps") + maps, err := os.Open(path) + if err != nil { + // fmt.Printf("Warning: Failed to read %#q: %v\n", path, err) + return + } + + scanner := bufio.NewScanner(maps) + for scanner.Scan() { + // 0 1 2 3 4 5 + // address perms offset dev inode pathname + // 7fdd8fece000-7fdd8ff74000 rw-p 00423000 fd:01 14156759 /usr/lib/x86_64-linux-gnu/libpython3.7m.so.1.0 + + fields := strings.Fields(scanner.Text()) + + // TODO: we assume that the pathname contains no spaces so + // bytes.Fields splits the line excactly into six fields + + if len(fields) != 6 { + continue + } + + pathname := fields[5] + + if !strings.HasPrefix(pathname, "/") { // Not a pathname + continue + } + + filename := filepath.Base(pathname) + ok, err := filepath.Match(F.pathglob, filename) + if err != nil || !ok { + continue + } + + if fields[1] != "rw-p" { // symbol needs to be writable + continue + } + + memOffset, found := F.findSymbol(pathname) + if !found { + continue + } + + start, _, err := parseRange(fields[0]) + if err != nil { + fmt.Printf("%w\n", err) + continue + } + + fileoffset, err := strconv.ParseUint(fields[2], 16, 64) + if err != nil { + fmt.Printf("Error while parsing fileoffset %#q: %w\n", fields[2], err) + continue + } + + F.finds[pid] = start + memOffset - fileoffset + + return start + memOffset - fileoffset, true + + } + + return 0, false +} + +func (F *Finder) findSymbol(pathname string) (offset uint64, found bool) { + + if offset, found = F.cache[pathname]; found { + return offset, found + } + + file, err := elf.Open(pathname) + if err != nil { + F.setErrorf("elf.Open(%s): %w", pathname, err) + return 0, false + } + + defer file.Close() + + symbols, err := file.DynamicSymbols() + if err != nil { + F.setErrorf("file.DynamicSymbols(): %w", err) + return 0, false + } + + var sym *elf.Symbol + + for _, s := range symbols { + if s.Name == F.symbol { + F.debugf("Found symbol %#v in %s: %#v\n", sym, pathname, s) + sym = &s + break + } + } + + if sym == nil { + F.debugf("symbol %q not found in %s\n", sym, pathname) + return 0, false + } + + if len(file.Sections) < int(sym.Section) { + F.debugf("len(file.Section) < int(sym.Section) for symbol %q in %s\n", sym, pathname) + return 0, false + } + + section := file.Sections[sym.Section] + if section == nil { + F.debugf("Section %v not found for ELF-Header %q in %s\n", sym.Section, pathname) + return 0, false + } + + header := §ion.SectionHeader + + memoffset := sym.Value - header.Addr + alignedOffset(header) + + F.cache[pathname] = memoffset + + return memoffset, true +} + +func parseRange(input string) (start, end uint64, e error) { + // 7fdd8fece000-7fdd8ff74000 + parts := strings.Split(input, "-") + + if len(parts) != 2 { + e = fmt.Errorf("[parseRange] unrecognized format for region: %#q", input) + return 0, 0, e + } + + start, e = strconv.ParseUint(parts[0], 16, 64) + if e != nil { + e = fmt.Errorf("[parseRange] couldn't parse start-address %#q in %#q: %w", parts[0], input, e) + return 0, 0, e + } + + end, e = strconv.ParseUint(parts[1], 16, 64) + if e != nil { + e = fmt.Errorf("[parseRange] couldn't parse end-address %#q in %#q: %w", parts[1], input, e) + return 0, 0, e + } + + return start, end, e +} + +func alignedOffset(section *elf.SectionHeader) uint64 { + mask := section.Addralign - 1 + return (section.Offset + mask) & (^mask) +}