From 7af1728eedf44e40e1d5baf1152696a33dca96b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96zg=C3=BCr=20Kesim?= Date: Wed, 15 Jan 2020 23:26:30 +0100 Subject: [PATCH] Cleanup done and documtation added symbolyze.go has been simplified and cleaned up. It now also is documented, f.e.: % go doc Scanner package symbolyze // import "." type Scanner struct { *log.Logger // Embedded logger // Has unexported fields. } Scanner represents an engine for scanning for a specific symbol in all ELF-files matching a certain pattern. The pattern is described in fileapth.Match(). Once a Scanner is created with New(), it should be populated with Observer functions using OnFound(). Optionally, the scanner can be put into debugging mode by a call to DebugOn() prior to a call to Run(). A call to Scanner.Run() then starts the engine and it will scan all pids in /proc. Whenever a match is found, all observers will be called with the (pid, offset), concurrently. func New(symbol, pathglob string) *Scanner func (S *Scanner) DebugOn() func (S *Scanner) OnFound(fun Observer) func (S *Scanner) Run() error --- GetRuntimeAddresses/main.go | 10 +- GetRuntimeAddresses/symbolyze/symbolyze.go | 273 +++++++++++++-------- 2 files changed, 176 insertions(+), 107 deletions(-) diff --git a/GetRuntimeAddresses/main.go b/GetRuntimeAddresses/main.go index 5e5006c..46e41a3 100644 --- a/GetRuntimeAddresses/main.go +++ b/GetRuntimeAddresses/main.go @@ -21,9 +21,13 @@ func main() { // Solution to your tasks goes here // - finder := symbolyze.New("_PyRuntime", "*python3*") - finder.OnFound(mapFD.Set) - finder.Run() + scanner := symbolyze.New("_PyRuntime", "*python3*") + scanner.OnFound(mapFD.Set) + err = scanner.Run() + if err != nil { + fmt.Printf("Failed to run the symbolyze scanner: %s", err) + os.Exit(1) + } mapContents, err := mapFD.GetMap() if err != nil { diff --git a/GetRuntimeAddresses/symbolyze/symbolyze.go b/GetRuntimeAddresses/symbolyze/symbolyze.go index 3ef5b8b..c5ea3b3 100644 --- a/GetRuntimeAddresses/symbolyze/symbolyze.go +++ b/GetRuntimeAddresses/symbolyze/symbolyze.go @@ -11,80 +11,109 @@ import ( "strings" ) -type Finder struct { - symbol string - pathglob string - cache map[string]uint64 - finds map[int]uint64 - ownpid int - observer func(key int, value uint64) error +// Scanner represents an engine for scanning for a specific symbol in all +// ELF-files matching a certain pattern. The pattern is described in +// fileapth.Match(). +// +// Once a Scanner is created with New(), it should be populated with Observer +// functions using OnFound(). Optionally, the scanner can be put into +// debugging mode by a call to DebugOn() prior to a call to Run(). +// +// A call to Scanner.Run() then starts the engine and it will scan all pids in +// /proc. Whenever a match is found, all observers will be called with the +// (pid, offset), concurrently. +type Scanner struct { + symbol string + pathglob string + cache map[string]uint64 // Contains (pathname, offset) + observers []Observer // Callbacks - *log.Logger + *log.Logger // Embedded logger + + // Instead of using a boolean to indicate debugging, we use function + // members. This way we can populate them with noop-functions in the + // non-debug case and not polute the code with if-statements. debugf func(format string, v ...interface{}) debugln func(v ...interface{}) - err error + err error // error state of the scanner. } -var ( - nodebugf = func(string, ...interface{}) {} - nodebugln = func(...interface{}) {} -) +// An Observer is a callback that can be registerd with Scanner.OnFound. It +// will be called with a pid and an offset. Observers are called concurrently. +// They have to be thread-safe. +type Observer func(pid int, offset uint64) error -func New(symbol, pathglob string) *Finder { - return &Finder{ +// New returns a new Scanner that scans all running processes for the given +// symbol name in all memory-mapped files matching the given pathglob. +// To be useful, one or more Observer functions should be registerd with +// Scanner.OnFound(). The scanning starts with a call of Scanner.Run(). +func New(symbol, pathglob string) *Scanner { + return &Scanner{ symbol: symbol, pathglob: pathglob, cache: map[string]uint64{}, - finds: map[int]uint64{}, - ownpid: os.Getpid(), - Logger: log.New(os.Stdout, "[symbolyze] ", log.LstdFlags), - debugf: nodebugf, - debugln: nodebugln, + Logger: log.New(os.Stderr, "[symbolyze] ", log.Ltime|log.Lmicroseconds), + + // debugging is off per default. + debugf: func(string, ...interface{}) {}, + debugln: func(...interface{}) {}, } } -// not threadsafe -func (F *Finder) Debug(on bool) { - if on { - F.debugf = F.Printf - F.debugln = F.Println - F.debugln("starting in debug-mode") - } else { - F.debugf = nodebugf - F.debugln = nodebugln - } +// Debug sets the scanner into debugging mode. It must called only once before +// a call to Scanner.Run(). +func (S *Scanner) DebugOn() { + // Use the embedded *log.Logger for debugging. + S.debugf = S.Printf + S.debugln = S.Println + S.debugln("starting in debug-mode") } -func (F *Finder) setErrorf(format string, a ...interface{}) { - F.err = fmt.Errorf(format, a) - F.Printf(format, a) +// setErrorf puts the Scanner into an error state with the given error +// statement. It also logs the error. +func (S *Scanner) setErrorf(format string, a ...interface{}) { + S.err = fmt.Errorf(format, a...) + S.Printf(format, a...) } -type Observer func(int, uint64) error - -// not threadsafe -func (F *Finder) OnFound(fun Observer) { - F.observer = fun +// OnFound puts an Observer function into the interal queue. The functions are +// called in sequence in their own goroutine whenever the scanner finds the +// symbol in the a running program. That implies that an Observer has to be +// thread-safe. Errors from the observers will be logged. +// +// Calling OnFound is not thread-safe. +func (S *Scanner) OnFound(fun Observer) { + S.observers = append(S.observers, fun) return } -func (F *Finder) Run() error { - if F.err != nil { - return F.err +// Run starts the scanning process. It scans the maps file all processes in +// /proc for pathnames that match the provided pathglob and that are ELF +// executables or shared libraries. It searches for the provided symbol in +// those files and calls the registered Observer functions concurrently with +// the pid and offset of the symbol. +// +// Run will return an error if it couldn't read the proc filesystem. Otherwise +// it will try to continue to loop over all pids, writing potential errors to +// the console. Errors from the observer functions are also logged and the +// final such error is the return value of Run(). +func (S *Scanner) Run() error { + if S.err != nil { + return S.err } proc, err := os.Open("/proc") if err != nil { - F.setErrorf("Failed to open /proc: %w\n", err) - return F.err + S.setErrorf("Failed to open /proc: %v\n", err) + return S.err } infos, err := proc.Readdir(-1) if err != nil { - F.setErrorf("Failed to read /proc: %w\n", err) - return F.err + S.setErrorf("Failed to read /proc: %v\n", err) + return S.err } proc.Close() @@ -97,145 +126,186 @@ func (F *Finder) Run() error { continue } else if pid, err := strconv.Atoi(pid_s); err != nil { continue - } else if pid == F.ownpid { // skip our own pid - continue - } else if offset, found := F.searchSymbolIn(pid); !found { + } else if offset, found := S.searchSymbolIn(pid); !found { continue } else { - F.finds[pid] = uint64(offset) - if F.observer != nil { - // TODO: accumulate errors? - err = F.observer(pid, uint64(offset)) - if err != nil { - F.debugf("F.observer error: %w", err) - } + // Notify the observers. + for n, observer := range S.observers { + go func() { + err = observer(pid, offset) + if err != nil { + S.setErrorf("S.observer[%d] error: %v", n, err) + // TODO: propagate errors from all Observers. + } + + }() } } } - return nil + + return S.err } -func (F *Finder) searchSymbolIn(pid int) (offset uint64, ok bool) { - // read the maps file for the binary and shared libraries +// searchSymbolIn loops over the entries in /proc//maps and searches for +// the symbol in the mapped files. +// +// The current implementation makes the following assumptions: +// 1. The pathname in an entry does not contain spaces. +// 2. The pathname starts with /. +// 3. The symbol must be in a region that has permission "rw-p". +// 4. The symbol is present at most in one mapped file at the same time. +// +// It returns the offsets in memory of the running program, if found. +func (S *Scanner) searchSymbolIn(pid int) (offset uint64, found bool) { + path := filepath.Join("/proc", strconv.Itoa(pid), "maps") maps, err := os.Open(path) if err != nil { - // fmt.Printf("Warning: Failed to read %#q: %v\n", path, err) - return + S.Printf("%v\n", err) + return 0, false } + defer maps.Close() + + // Read the entries by line scanner := bufio.NewScanner(maps) for scanner.Scan() { + + // A line of our interest in the maps file has the following + // structure, see man proc(5). + // // 0 1 2 3 4 5 // address perms offset dev inode pathname // 7fdd8fece000-7fdd8ff74000 rw-p 00423000 fd:01 14156759 /usr/lib/x86_64-linux-gnu/libpython3.7m.so.1.0 - fields := strings.Fields(scanner.Text()) - - // TODO: we assume that the pathname contains no spaces so + // We assume that the pathname contains no spaces so // bytes.Fields splits the line excactly into six fields - + fields := strings.Fields(scanner.Text()) if len(fields) != 6 { continue } pathname := fields[5] - if !strings.HasPrefix(pathname, "/") { // Not a pathname continue } + // The filename must match the given pattern filename := filepath.Base(pathname) - ok, err := filepath.Match(F.pathglob, filename) + ok, err := filepath.Match(S.pathglob, filename) if err != nil || !ok { continue } - if fields[1] != "rw-p" { // symbol needs to be writable + // The symbol needs to be writable + if fields[1] != "rw-p" { continue } - memOffset, found := F.findSymbol(pathname) + // Get the start address of the mapped region in memory + startAddress, _, err := parseRange(fields[0]) + if err != nil { + S.Printf("%v\n", err) + continue + } + + // Read the offset in the file that this region is mapping + fileOffset, err := strconv.ParseUint(fields[2], 16, 64) + if err != nil { + S.Printf("fields[2] %#q: %v\n", fields[2], err) + continue + } + + // Finally, find the symbol in the binary. If found, + // findSymbol returns the offset of the symbol in memory, + // taking alignment into account. + memOffset, found := S.findSymbol(pathname) if !found { continue } - start, _, err := parseRange(fields[0]) - if err != nil { - fmt.Printf("%w\n", err) - continue - } - - fileoffset, err := strconv.ParseUint(fields[2], 16, 64) - if err != nil { - fmt.Printf("Error while parsing fileoffset %#q: %w\n", fields[2], err) - continue - } - - F.finds[pid] = start + memOffset - fileoffset - - return start + memOffset - fileoffset, true - + // Hurray, we've found an entry! + return startAddress + memOffset - fileOffset, true } return 0, false } -func (F *Finder) findSymbol(pathname string) (offset uint64, found bool) { +// findSymbol searches for the provided symbol in the given pathname to an +// ELF-file. If found, it returns the offset of the symbol in the virtual +// memory according to the fomula: +// +// vmOffset = alignedOffset(section) + offsetInSection(symbol) +// +// The result will be cached so that subsequent calls to findSymbol with the +// same pathname can quickly return. +func (S *Scanner) findSymbol(pathname string) (offset uint64, found bool) { - if offset, found = F.cache[pathname]; found { + // 0. Return the value from the cache, if found. + if offset, found = S.cache[pathname]; found { return offset, found } + // 1. Open the file with the ELF-parser file, err := elf.Open(pathname) if err != nil { - F.setErrorf("elf.Open(%s): %w", pathname, err) + S.Printf("%v", err) return 0, false } defer file.Close() + // 2. Find the symbol symbols, err := file.DynamicSymbols() if err != nil { - F.setErrorf("file.DynamicSymbols(): %w", err) + S.Printf("%v", err) return 0, false } var sym *elf.Symbol - for _, s := range symbols { - if s.Name == F.symbol { - F.debugf("Found symbol %#v in %s: %#v\n", sym, pathname, s) + if s.Name == S.symbol { + S.debugf("Found symbol %#v in %s: %#v\n", sym, pathname, s) sym = &s break } } - if sym == nil { - F.debugf("symbol %q not found in %s\n", sym, pathname) + S.debugf("symbol %q not found in %s\n", sym, pathname) return 0, false } + // 3. Extract the information about the section if len(file.Sections) < int(sym.Section) { - F.debugf("len(file.Section) < int(sym.Section) for symbol %q in %s\n", sym, pathname) + S.debugf("len(file.Section) < int(sym.Section) for symbol %q in %s\n", sym, pathname) return 0, false } section := file.Sections[sym.Section] if section == nil { - F.debugf("Section %v not found for ELF-Header %q in %s\n", sym.Section, pathname) + S.debugf("Section %v not found for ELF-Header %q in %s\n", sym.Section, pathname) return 0, false } - header := §ion.SectionHeader + // 4. Calculate the offset of the given section, aligned according to + // the SectionHeader.Addralign entry. + mask := section.SectionHeader.Addralign - 1 + alignedSectOff := (section.SectionHeader.Offset + mask) & (^mask) - memoffset := sym.Value - header.Addr + alignedOffset(header) + // 5. The location of the symbol in virtual memory is finally: + vmOffset := alignedSectOff + (sym.Value - section.SectionHeader.Addr) - F.cache[pathname] = memoffset + // 6. Store this calculation in our cache so that we don't to touch + // this file again. + S.cache[pathname] = vmOffset - return memoffset, true + return vmOffset, true } +// parseRange is a helper function that parses the first field in a line in +// /proc//maps: +// 7fdd8fece000-7fdd8ff74000 rw-p ... +// It returns the start and end addresses of the range and a potential error. func parseRange(input string) (start, end uint64, e error) { // 7fdd8fece000-7fdd8ff74000 parts := strings.Split(input, "-") @@ -259,8 +329,3 @@ func parseRange(input string) (start, end uint64, e error) { return start, end, e } - -func alignedOffset(section *elf.SectionHeader) uint64 { - mask := section.Addralign - 1 - return (section.Offset + mask) & (^mask) -}