ebpf-challenge/GetRuntimeAddresses/symbolyze/symbolyze.go

431 lines
11 KiB
Go

package symbolyze
import (
"bufio"
"debug/elf"
"fmt"
"log"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
)
// Scanner represents an engine for scanning for a specific symbol in all
// ELF-files matching a certain pattern. The pattern is described in
// fileapth.Match().
//
// Once a Scanner is created with NewScanner(), it should be populated with
// Observer functions using OnFound(). Optionally, the scanner can be put into
// debugging mode by a call to DebugOn() prior to a call to Run().
//
// A call to Scanner.Run() then starts the engine and it will scan all pids in
// /proc. Whenever a match is found, all observers will be called with the
// (pid, offset), concurrently.
type Scanner struct {
rwmutex
symbol string
pathglob string
cache map[string]uint64 // Contains (pathname, offset)
observers []Observer // Callbacks
// Instead of using a boolean to indicate debugging, we use function
// members. This way we can populate them with noop-functions in the
// non-debug case and not polute the code with if-statements.
debugf func(format string, v ...interface{})
debugln func(v ...interface{})
logger // Embedded logger
errors errors
ticker *time.Ticker // Used to run the scanner repeatedly
}
type errors []error
func (e errors) Error() string {
switch len(e) {
case 0:
return "nil"
case 1:
return e[0].Error()
default:
list := make([]string, len(e))
for i := range e {
list[i] = e[i].Error()
}
return fmt.Sprintf("multiple errors:\n%s", strings.Join(list, "\n"))
}
}
// We use a lowercase type alias for *log.Logger so that we can embedd it in
// Scanner without exporting it.
type logger = *log.Logger
type rwmutex = sync.RWMutex
var (
nodebugf = func(format string, v ...interface{}) {}
nodebugln = func(v ...interface{}) {}
)
// An Observer is a callback that can be registered with Scanner.OnFound. It
// will be called with a pid and an offset. Observers are called concurrently.
// They have to be thread-safe.
type Observer func(pid int, offset uint64) error
// NewScanner returns a new Scanner that scans all running processes for the
// given symbol name in all memory-mapped files matching the given pathglob.
// To be useful, one or more Observer functions should be registered with
// Scanner.OnFound(). The scanning starts with a call of Scanner.Run().
func NewScanner(symbol, pathglob string) *Scanner {
return &Scanner{
symbol: symbol,
pathglob: pathglob,
cache: map[string]uint64{},
logger: log.New(os.Stderr, "[symbolyze] ", log.Ltime|log.Lmicroseconds),
// debugging is off per default.
debugf: nodebugf,
debugln: nodebugln,
}
}
// Debug sets the scanner into debugging mode. It must be called only once
// before a call to Scanner.Run().
func (S *Scanner) Debug(on bool) {
S.Lock()
defer S.Unlock()
if on {
// Use the embedded *log.Logger for debugging.
S.debugf = S.Printf
S.debugln = S.Println
S.debugln("starting in debug-mode")
} else {
S.debugf = nodebugf
S.debugln = nodebugln
}
}
// setErrorf puts the Scanner into an error state with the given error
// statement. It also logs the error. setErrorf is not thread-safe.
func (S *Scanner) setErrorf(format string, a ...interface{}) {
S.Lock()
S.errors = append(S.errors, fmt.Errorf(format, a...))
S.Unlock()
S.Printf(format, a...)
}
func (S *Scanner) HasErrors() bool {
S.RLock()
defer S.RUnlock()
return len(S.errors) > 0
}
func (S *Scanner) Errors() error {
S.RLock()
defer S.RUnlock()
if len(S.errors) == 0 {
return nil
} else {
e2 := make(errors, len(S.errors))
copy(e2, S.errors)
return e2
}
}
func (S *Scanner) addError(err error) {
S.Lock()
defer S.Unlock()
S.errors = append(S.errors, err)
}
// OnFound puts Observer functions at the end of the internal queue. All
// Observer functions are called in sequence in their own goroutine whenever
// the scanner finds the symbol in a running program. That implies that an
// Observer has to be thread-safe. Errors from the observers will be logged.
func (S *Scanner) OnFound(fun ...Observer) {
S.Lock()
defer S.Unlock()
S.observers = append(S.observers, fun...)
return
}
// Run starts the scanning process. It scans the entries of all /proc/NNN/maps
// files for pathnames that match the provided path-glob and are executables or
// shared libraries in ELF format. It searches for the provided symbol in
// those files and calls the registered Observer functions, concurrently, with
// the pid and offset of the symbol.
//
// Run will return an error if it couldn't read the proc filesystem. Otherwise
// it will try to continue to loop over all pids, writing potential errors to
// the console. Errors from the observer functions are logged.
func (S *Scanner) Run() error {
if S.HasErrors() {
return S.Errors()
}
proc, err := os.Open("/proc")
if err != nil {
S.setErrorf("Failed to open /proc: %v\n", err)
return S.Errors()
}
infos, err := proc.Readdir(-1)
if err != nil {
S.setErrorf("Failed to read /proc: %v\n", err)
return S.Errors()
}
proc.Close()
var wg sync.WaitGroup // To be able to wait for all the observers to finish
for _, pinfo := range infos {
var pid_s = pinfo.Name()
// The entry /proc/NNN/ must be a directory with integer name
if !pinfo.IsDir() {
continue
} else if pid, err := strconv.Atoi(pid_s); err != nil {
continue
} else if offset, found := S.searchSymbolInPid(pid); !found {
continue
} else {
// Call the observers with (pid, offset), in the
// background.
wg.Add(len(S.observers)) // Wait for this many goroutines
go func() {
for n, observer := range S.observers {
observer := observer
go func() {
err = observer(pid, offset)
if err != nil {
S.addError(fmt.Errorf("S.observer[%d](%d, %d) error: %v", n, pid, offset, err))
}
wg.Done()
}()
}
}()
}
}
wg.Wait() // Wait for all observers to finish
return S.Errors()
}
// RunEvery() starts a scanning process and repeats at the given time step.
func (S *Scanner) RunEvery(step time.Duration) {
S.ticker = time.NewTicker(step)
for {
select {
case <-S.ticker.C:
err := S.Run()
if err != nil {
S.Println(err)
}
}
}
}
func (S *Scanner) Stop() {
if S.ticker != nil {
S.debugln("Stopping ticker")
S.ticker.Stop()
}
}
// searchSymbolInPid loops over the entries in /proc/<pid>/maps and searches for
// the symbol in the mapped files.
//
// The current implementation makes the following assumptions:
// 1. The pathname in an entry does not contain spaces.
// 2. The pathname starts with /.
// 3. The symbol must be in a region that has permission "rw-p".
// 4. The symbol is present at most in one mapped file at the same time.
//
// It returns the offsets in memory of the running program, if found.
func (S *Scanner) searchSymbolInPid(pid int) (offset uint64, found bool) {
path := filepath.Join("/proc", strconv.Itoa(pid), "maps")
maps, err := os.Open(path)
if err != nil {
S.debugf("%v\n", err)
return 0, false
}
defer maps.Close()
// Read the entries by line
scanner := bufio.NewScanner(maps)
for scanner.Scan() {
// A line of our interest in the maps file has the following
// structure, see man proc(5).
//
// 0 1 2 3 4 5
// address perms offset dev inode pathname
// 7fdd8fece000-7fdd8ff74000 rw-p 00423000 fd:01 14156759 /usr/lib/x86_64-linux-gnu/libpython3.7m.so.1.0
// We assume that the pathname contains no spaces so
// bytes.Fields splits the line excactly into six fields
fields := strings.Fields(scanner.Text())
if len(fields) != 6 {
continue
}
pathname := fields[5]
if !strings.HasPrefix(pathname, "/") { // Not a pathname
continue
}
// The filename must match the given pattern
filename := filepath.Base(pathname)
ok, err := filepath.Match(S.pathglob, filename)
if err != nil || !ok {
continue
}
// The symbol needs to be writable
if fields[1] != "rw-p" {
continue
}
// Get the start address of the mapped region in memory
startAddress, _, err := parseRange(fields[0])
if err != nil {
S.Printf("%v\n", err)
continue
}
// Read the offset in the file that this region is mapping
fileOffset, err := strconv.ParseUint(fields[2], 16, 64)
if err != nil {
S.Printf("fields[2] %#q: %v\n", fields[2], err)
continue
}
// Finally, find the symbol in the binary. If found,
// findSymbol returns the offset of the symbol in memory,
// taking alignment into account.
memOffset, found := S.findSymbolInELF(pathname)
if !found {
continue
}
// Hurray, we've found an entry!
return startAddress + memOffset - fileOffset, true
}
return 0, false
}
// findSymbolInELF searches for the provided symbol in the given pathname to an
// ELF-file. If found, it returns the offset of the symbol in the virtual
// memory according to the fomula:
//
// vmOffset = alignedOffset(section) + offsetInSection(symbol)
//
// The result will be cached so that subsequent calls to findSymbolInELF with
// the same pathname can quickly return.
func (S *Scanner) findSymbolInELF(pathname string) (offset uint64, found bool) {
// 0. Return the value from the cache, if found.
S.RLock()
if offset, found = S.cache[pathname]; found {
S.RUnlock()
return offset, found
}
S.RUnlock()
// 1. Open the file with the ELF-parser
file, err := elf.Open(pathname)
if err != nil {
S.Printf("%v", err)
return 0, false
}
defer file.Close()
// 2. Find the symbol
symbols, err := file.DynamicSymbols()
if err != nil {
S.Printf("%v", err)
return 0, false
}
var sym *elf.Symbol
for _, s := range symbols {
if s.Name == S.symbol {
S.debugf("Found symbol %#v in %s: %#v\n", sym, pathname, s)
sym = &s
break
}
}
if sym == nil {
S.debugf("symbol %q not found in %s\n", sym, pathname)
return 0, false
}
// 3. Extract the information about the section
if len(file.Sections) < int(sym.Section) {
S.debugf("len(file.Section) < int(sym.Section) for symbol %q in %s\n", sym, pathname)
return 0, false
}
section := file.Sections[sym.Section]
if section == nil {
S.debugf("Section %v not found for ELF-Header %q in %s\n", sym.Section, pathname)
return 0, false
}
// 4. Calculate the offset of the given section, aligned according to
// the SectionHeader.Addralign entry.
mask := section.SectionHeader.Addralign - 1
alignedSectOff := (section.SectionHeader.Offset + mask) & (^mask)
// 5. The location of the symbol in virtual memory is finally:
vmOffset := alignedSectOff + (sym.Value - section.SectionHeader.Addr)
// 6. Store this calculation in our cache so that we don't to touch
// this file again.
S.Lock()
S.cache[pathname] = vmOffset
S.Unlock()
return vmOffset, true
}
// parseRange is a helper function that parses the first field in a line in
// /proc/<pid>/maps:
// 7fdd8fece000-7fdd8ff74000 rw-p ...
// It returns the start and end addresses of the range and a potential error.
func parseRange(input string) (start, end uint64, e error) {
// 7fdd8fece000-7fdd8ff74000
parts := strings.Split(input, "-")
if len(parts) != 2 {
e = fmt.Errorf("[parseRange] unrecognized format for region: %#q", input)
return 0, 0, e
}
start, e = strconv.ParseUint(parts[0], 16, 64)
if e != nil {
e = fmt.Errorf("[parseRange] couldn't parse start-address %#q in %#q: %w", parts[0], input, e)
return 0, 0, e
}
end, e = strconv.ParseUint(parts[1], 16, 64)
if e != nil {
e = fmt.Errorf("[parseRange] couldn't parse end-address %#q in %#q: %w", parts[1], input, e)
return 0, 0, e
}
return start, end, e
}