package symbolyze import ( "bufio" "debug/elf" "fmt" "log" "os" "path/filepath" "strconv" "strings" "sync" "time" ) // Scanner represents an engine for scanning for a specific symbol in all // ELF-files matching a certain pattern. The pattern is described in // fileapth.Match(). // // Once a Scanner is created with NewScanner(), it should be populated with // Observer functions using OnFound(). Optionally, the scanner can be put into // debugging mode by a call to DebugOn() prior to a call to Run(). // // A call to Scanner.Run() then starts the engine and it will scan all pids in // /proc. Whenever a match is found, all observers will be called with the // (pid, offset), concurrently. type Scanner struct { rwmutex symbol string pathglob string cache map[string]uint64 // Contains (pathname, offset) observers []Observer // Callbacks // Instead of using a boolean to indicate debugging, we use function // members. This way we can populate them with noop-functions in the // non-debug case and not polute the code with if-statements. debugf func(format string, v ...interface{}) debugln func(v ...interface{}) logger // Embedded logger errors errors ticker *time.Ticker // Used to run the scanner repeatedly } type errors []error func (e errors) Error() string { switch len(e) { case 0: return "nil" case 1: return e[0].Error() default: list := make([]string, len(e)) for i := range e { list[i] = e[i].Error() } return fmt.Sprintf("multiple errors:\n%s", strings.Join(list, "\n")) } } // We use a lowercase type alias for *log.Logger so that we can embedd it in // Scanner without exporting it. type logger = *log.Logger type rwmutex = sync.RWMutex var ( nodebugf = func(format string, v ...interface{}) {} nodebugln = func(v ...interface{}) {} ) // An Observer is a callback that can be registered with Scanner.OnFound. It // will be called with a pid and an offset. Observers are called concurrently. // They have to be thread-safe. type Observer func(pid int, offset uint64) error // NewScanner returns a new Scanner that scans all running processes for the // given symbol name in all memory-mapped files matching the given pathglob. // To be useful, one or more Observer functions should be registered with // Scanner.OnFound(). The scanning starts with a call of Scanner.Run(). func NewScanner(symbol, pathglob string) *Scanner { return &Scanner{ symbol: symbol, pathglob: pathglob, cache: map[string]uint64{}, logger: log.New(os.Stderr, "[symbolyze] ", log.Ltime|log.Lmicroseconds), // debugging is off per default. debugf: nodebugf, debugln: nodebugln, } } // Debug sets the scanner into debugging mode. It must be called only once // before a call to Scanner.Run(). func (S *Scanner) Debug(on bool) { S.Lock() defer S.Unlock() if on { // Use the embedded *log.Logger for debugging. S.debugf = S.Printf S.debugln = S.Println S.debugln("starting in debug-mode") } else { S.debugf = nodebugf S.debugln = nodebugln } } // setErrorf puts the Scanner into an error state with the given error // statement. It also logs the error. setErrorf is not thread-safe. func (S *Scanner) setErrorf(format string, a ...interface{}) { S.Lock() S.errors = append(S.errors, fmt.Errorf(format, a...)) S.Unlock() S.Printf(format, a...) } func (S *Scanner) HasErrors() bool { S.RLock() defer S.RUnlock() return len(S.errors) > 0 } func (S *Scanner) Errors() error { S.RLock() defer S.RUnlock() if len(S.errors) == 0 { return nil } else { e2 := make(errors, len(S.errors)) copy(e2, S.errors) return e2 } } func (S *Scanner) addError(err error) { S.Lock() defer S.Unlock() S.errors = append(S.errors, err) } // OnFound puts Observer functions at the end of the internal queue. All // Observer functions are called in sequence in their own goroutine whenever // the scanner finds the symbol in a running program. That implies that an // Observer has to be thread-safe. Errors from the observers will be logged. func (S *Scanner) OnFound(fun ...Observer) { S.Lock() defer S.Unlock() S.observers = append(S.observers, fun...) return } // Run starts the scanning process. It scans the entries of all /proc/NNN/maps // files for pathnames that match the provided path-glob and are executables or // shared libraries in ELF format. It searches for the provided symbol in // those files and calls the registered Observer functions, concurrently, with // the pid and offset of the symbol. // // Run will return an error if it couldn't read the proc filesystem. Otherwise // it will try to continue to loop over all pids, writing potential errors to // the console. Errors from the observer functions are logged. func (S *Scanner) Run() error { if S.HasErrors() { return S.Errors() } proc, err := os.Open("/proc") if err != nil { S.setErrorf("Failed to open /proc: %v\n", err) return S.Errors() } infos, err := proc.Readdir(-1) if err != nil { S.setErrorf("Failed to read /proc: %v\n", err) return S.Errors() } proc.Close() var wg sync.WaitGroup // To be able to wait for all the observers to finish for _, pinfo := range infos { var pid_s = pinfo.Name() // The entry /proc/NNN/ must be a directory with integer name if !pinfo.IsDir() { continue } else if pid, err := strconv.Atoi(pid_s); err != nil { continue } else if offset, found := S.searchSymbolInPid(pid); !found { continue } else { // Call the observers with (pid, offset), in the // background. wg.Add(len(S.observers)) // Wait for this many goroutines go func() { for n, observer := range S.observers { observer := observer go func() { err = observer(pid, offset) if err != nil { S.addError(fmt.Errorf("S.observer[%d](%d, %d) error: %v", n, pid, offset, err)) } wg.Done() }() } }() } } wg.Wait() // Wait for all observers to finish return S.Errors() } // RunEvery() starts a scanning process and repeats at the given time step. func (S *Scanner) RunEvery(step time.Duration) { S.ticker = time.NewTicker(step) for { select { case <-S.ticker.C: err := S.Run() if err != nil { S.Println(err) } } } } func (S *Scanner) Stop() { if S.ticker != nil { S.debugln("Stopping ticker") S.ticker.Stop() } } // searchSymbolInPid loops over the entries in /proc//maps and searches for // the symbol in the mapped files. // // The current implementation makes the following assumptions: // 1. The pathname in an entry does not contain spaces. // 2. The pathname starts with /. // 3. The symbol must be in a region that has permission "rw-p". // 4. The symbol is present at most in one mapped file at the same time. // // It returns the offsets in memory of the running program, if found. func (S *Scanner) searchSymbolInPid(pid int) (offset uint64, found bool) { path := filepath.Join("/proc", strconv.Itoa(pid), "maps") maps, err := os.Open(path) if err != nil { S.debugf("%v\n", err) return 0, false } defer maps.Close() // Read the entries by line scanner := bufio.NewScanner(maps) for scanner.Scan() { // A line of our interest in the maps file has the following // structure, see man proc(5). // // 0 1 2 3 4 5 // address perms offset dev inode pathname // 7fdd8fece000-7fdd8ff74000 rw-p 00423000 fd:01 14156759 /usr/lib/x86_64-linux-gnu/libpython3.7m.so.1.0 // We assume that the pathname contains no spaces so // bytes.Fields splits the line excactly into six fields fields := strings.Fields(scanner.Text()) if len(fields) != 6 { continue } pathname := fields[5] if !strings.HasPrefix(pathname, "/") { // Not a pathname continue } // The filename must match the given pattern filename := filepath.Base(pathname) ok, err := filepath.Match(S.pathglob, filename) if err != nil || !ok { continue } // The symbol needs to be writable if fields[1] != "rw-p" { continue } // Get the start address of the mapped region in memory startAddress, _, err := parseRange(fields[0]) if err != nil { S.Printf("%v\n", err) continue } // Read the offset in the file that this region is mapping fileOffset, err := strconv.ParseUint(fields[2], 16, 64) if err != nil { S.Printf("fields[2] %#q: %v\n", fields[2], err) continue } // Finally, find the symbol in the binary. If found, // findSymbol returns the offset of the symbol in memory, // taking alignment into account. memOffset, found := S.findSymbolInELF(pathname) if !found { continue } // Hurray, we've found an entry! return startAddress + memOffset - fileOffset, true } return 0, false } // findSymbolInELF searches for the provided symbol in the given pathname to an // ELF-file. If found, it returns the offset of the symbol in the virtual // memory according to the fomula: // // vmOffset = alignedOffset(section) + offsetInSection(symbol) // // The result will be cached so that subsequent calls to findSymbolInELF with // the same pathname can quickly return. func (S *Scanner) findSymbolInELF(pathname string) (offset uint64, found bool) { // 0. Return the value from the cache, if found. S.RLock() if offset, found = S.cache[pathname]; found { S.RUnlock() return offset, found } S.RUnlock() // 1. Open the file with the ELF-parser file, err := elf.Open(pathname) if err != nil { S.Printf("%v", err) return 0, false } defer file.Close() // 2. Find the symbol symbols, err := file.DynamicSymbols() if err != nil { S.Printf("%v", err) return 0, false } var sym *elf.Symbol for _, s := range symbols { if s.Name == S.symbol { S.debugf("Found symbol %#v in %s: %#v\n", sym, pathname, s) sym = &s break } } if sym == nil { S.debugf("symbol %q not found in %s\n", sym, pathname) return 0, false } // 3. Extract the information about the section if len(file.Sections) < int(sym.Section) { S.debugf("len(file.Section) < int(sym.Section) for symbol %q in %s\n", sym, pathname) return 0, false } section := file.Sections[sym.Section] if section == nil { S.debugf("Section %v not found for ELF-Header %q in %s\n", sym.Section, pathname) return 0, false } // 4. Calculate the offset of the given section, aligned according to // the SectionHeader.Addralign entry. mask := section.SectionHeader.Addralign - 1 alignedSectOff := (section.SectionHeader.Offset + mask) & (^mask) // 5. The location of the symbol in virtual memory is finally: vmOffset := alignedSectOff + (sym.Value - section.SectionHeader.Addr) // 6. Store this calculation in our cache so that we don't to touch // this file again. S.Lock() S.cache[pathname] = vmOffset S.Unlock() return vmOffset, true } // parseRange is a helper function that parses the first field in a line in // /proc//maps: // 7fdd8fece000-7fdd8ff74000 rw-p ... // It returns the start and end addresses of the range and a potential error. func parseRange(input string) (start, end uint64, e error) { // 7fdd8fece000-7fdd8ff74000 parts := strings.Split(input, "-") if len(parts) != 2 { e = fmt.Errorf("[parseRange] unrecognized format for region: %#q", input) return 0, 0, e } start, e = strconv.ParseUint(parts[0], 16, 64) if e != nil { e = fmt.Errorf("[parseRange] couldn't parse start-address %#q in %#q: %w", parts[0], input, e) return 0, 0, e } end, e = strconv.ParseUint(parts[1], 16, 64) if e != nil { e = fmt.Errorf("[parseRange] couldn't parse end-address %#q in %#q: %w", parts[1], input, e) return 0, 0, e } return start, end, e }