Özgür Kesim
88f1455dd7
Instead of using cgo we call the syscall for BPF directly from go. The API hasn't changed, however, and we also closely follow the C-implementation as given in bpf(2). Not sure if this pure go variant is beneficial. Manual maintenance of all constants and structs upon changes of the BPF API would be necessary and cumbersome. We would at least need to complement this with auto-generation of constants and fields from /usr/include/linux/bpf.h.
308 lines
6.6 KiB
Go
308 lines
6.6 KiB
Go
// +build !withcgo
|
|
|
|
package ebpf
|
|
|
|
import (
|
|
"fmt"
|
|
"syscall"
|
|
"unsafe"
|
|
)
|
|
|
|
// All constants are taken from /usr/include/linux/bpf.h
|
|
|
|
const BPF_SYSCALL = 321
|
|
|
|
type bpf_cmd int
|
|
|
|
const (
|
|
BPF_MAP_CREATE bpf_cmd = iota
|
|
BPF_MAP_LOOKUP_ELEM
|
|
BPF_MAP_UPDATE_ELEM
|
|
BPF_MAP_DELETE_ELEM
|
|
BPF_MAP_GET_NEXT_KEY
|
|
BPF_PROG_LOAD
|
|
BPF_OBJ_PIN
|
|
BPF_OBJ_GET
|
|
BPF_PROG_ATTACH
|
|
BPF_PROG_DETACH
|
|
BPF_PROG_TEST_RUN
|
|
BPF_PROG_GET_NEXT_ID
|
|
BPF_MAP_GET_NEXT_ID
|
|
BPF_PROG_GET_FD_BY_ID
|
|
BPF_MAP_GET_FD_BY_ID
|
|
BPF_OBJ_GET_INFO_BY_FD
|
|
BPF_PROG_QUERY
|
|
BPF_RAW_TRACEPOINT_OPEN
|
|
BPF_BTF_LOAD
|
|
BPF_BTF_GET_FD_BY_ID
|
|
BPF_TASK_FD_QUERY
|
|
BPF_MAP_LOOKUP_AND_DELETE_ELEM
|
|
BPF_MAP_FREEZE
|
|
)
|
|
|
|
type bpf_map_type int
|
|
|
|
const (
|
|
BPF_MAP_TYPE_UNSPEC bpf_map_type = iota
|
|
BPF_MAP_TYPE_HASH
|
|
BPF_MAP_TYPE_ARRAY
|
|
BPF_MAP_TYPE_PROG_ARRAY
|
|
BPF_MAP_TYPE_PERF_EVENT_ARRAY
|
|
BPF_MAP_TYPE_PERCPU_HASH
|
|
BPF_MAP_TYPE_PERCPU_ARRAY
|
|
BPF_MAP_TYPE_STACK_TRACE
|
|
BPF_MAP_TYPE_CGROUP_ARRAY
|
|
BPF_MAP_TYPE_LRU_HASH
|
|
BPF_MAP_TYPE_LRU_PERCPU_HASH
|
|
BPF_MAP_TYPE_LPM_TRIE
|
|
BPF_MAP_TYPE_ARRAY_OF_MAPS
|
|
BPF_MAP_TYPE_HASH_OF_MAPS
|
|
BPF_MAP_TYPE_DEVMAP
|
|
BPF_MAP_TYPE_SOCKMAP
|
|
BPF_MAP_TYPE_CPUMAP
|
|
BPF_MAP_TYPE_XSKMAP
|
|
BPF_MAP_TYPE_SOCKHASH
|
|
BPF_MAP_TYPE_CGROUP_STORAGE
|
|
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY
|
|
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
|
|
BPF_MAP_TYPE_QUEUE
|
|
BPF_MAP_TYPE_STACK
|
|
BPF_MAP_TYPE_SK_STORAGE
|
|
)
|
|
|
|
// MapFD is a file descriptor representing a eBPF map
|
|
type MapFD uint32
|
|
|
|
/*
|
|
|
|
All methods in this file implement a syscall to bpf one way or another. We
|
|
follow the C-API given in bpf(2) and have the corresponding C-functions
|
|
embedded as comments. Those all refer to the C-function bpf(), which is a
|
|
wrapper for the syscall:
|
|
|
|
static int bpf(enum bpf_cmd cmd, union bpf_attr *attr, unsigned int size)
|
|
{
|
|
return syscall(__NR_bpf, cmd, attr, size);
|
|
}
|
|
|
|
Each ouf our methods calls the syscall directly, instead.
|
|
|
|
*/
|
|
|
|
// CreateMap creates an eBPF map from int->uint64. The file descriptor of the
|
|
// created map is returned.
|
|
func CreateMap() (MapFD, error) {
|
|
/*
|
|
static int bpf_create_map(
|
|
enum bpf_map_type map_type, unsigned int key_size, unsigned int value_size,
|
|
unsigned int max_entries)
|
|
{
|
|
union bpf_attr attr = {
|
|
.map_type = map_type,
|
|
.key_size = key_size,
|
|
.value_size = value_size,
|
|
.max_entries = max_entries};
|
|
|
|
return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
|
|
}
|
|
*/
|
|
|
|
create_attr := struct {
|
|
map_type uint32
|
|
key_size uint32
|
|
value_size uint32
|
|
max_entries uint32
|
|
map_flags uint32
|
|
inner_map_fd uint32
|
|
// minimum of required fields
|
|
}{
|
|
map_type: uint32(BPF_MAP_TYPE_HASH),
|
|
key_size: 4,
|
|
value_size: 8,
|
|
max_entries: 64,
|
|
}
|
|
|
|
r, _, err := syscall.Syscall(
|
|
BPF_SYSCALL,
|
|
uintptr(BPF_MAP_CREATE),
|
|
uintptr(unsafe.Pointer(&create_attr)),
|
|
unsafe.Sizeof(create_attr),
|
|
)
|
|
|
|
if err != 0 {
|
|
return 0, err
|
|
}
|
|
return MapFD(r), nil
|
|
}
|
|
|
|
func (mfd MapFD) bpf_get_next_key(key *int, next_key *int) error {
|
|
/*
|
|
int bpf_get_next_key(int fd, const void *key, void *next_key)
|
|
{
|
|
union bpf_attr attr = {
|
|
.map_fd = fd,
|
|
.key = (__u64) (unsigned long) key,
|
|
.next_key = (__u64) (unsigned long) next_key};
|
|
|
|
return bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
|
|
}
|
|
*/
|
|
|
|
next_attr := struct {
|
|
map_fd MapFD
|
|
key uint64
|
|
next_key uint64
|
|
}{
|
|
map_fd: mfd,
|
|
key: uint64(uintptr(unsafe.Pointer(key))),
|
|
next_key: uint64(uintptr(unsafe.Pointer(next_key))),
|
|
}
|
|
|
|
r, _, err := syscall.Syscall(
|
|
BPF_SYSCALL,
|
|
uintptr(BPF_MAP_GET_NEXT_KEY),
|
|
uintptr(unsafe.Pointer(&next_attr)),
|
|
unsafe.Sizeof(next_attr),
|
|
)
|
|
|
|
if r != 0 {
|
|
return err
|
|
} else {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func (mfd MapFD) bpf_lookup_elem(key *int, value *uint64) error {
|
|
/*
|
|
int bpf_lookup_elem(int fd, const void *key, void *value)
|
|
{
|
|
union bpf_attr attr = {
|
|
.map_fd = fd,
|
|
.key = (__u64) (unsigned long) key,
|
|
.value = (__u64) (unsigned long) value,
|
|
};
|
|
|
|
return bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
|
|
}
|
|
*/
|
|
|
|
lookup_attr := struct {
|
|
map_fd MapFD
|
|
key uint64
|
|
value uint64
|
|
}{
|
|
map_fd: mfd,
|
|
key: uint64(uintptr(unsafe.Pointer(key))),
|
|
value: uint64(uintptr(unsafe.Pointer(value))),
|
|
}
|
|
|
|
r, _, err := syscall.Syscall(
|
|
BPF_SYSCALL,
|
|
uintptr(BPF_MAP_LOOKUP_ELEM),
|
|
uintptr(unsafe.Pointer(&lookup_attr)),
|
|
unsafe.Sizeof(lookup_attr),
|
|
)
|
|
|
|
if r != 0 {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// GetMap gets the key/value pairs from the specified eBPF map as a Go map
|
|
func (mfd MapFD) GetMap() (map[int]uint64, error) {
|
|
retMap := make(map[int]uint64)
|
|
|
|
var (
|
|
key, next int
|
|
value uint64
|
|
)
|
|
|
|
for {
|
|
err := mfd.bpf_get_next_key(&key, &next)
|
|
if err != nil {
|
|
if err == syscall.ENOENT {
|
|
// The provided key was the last element. We're done iterating.
|
|
return retMap, nil
|
|
}
|
|
return nil, fmt.Errorf("bpf_get_next_key failed with error %v", err)
|
|
}
|
|
|
|
err = mfd.bpf_lookup_elem(&next, &value)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("bpf_lookup_elem failed with error %v", err)
|
|
}
|
|
|
|
retMap[next] = value
|
|
|
|
key = next
|
|
}
|
|
}
|
|
|
|
const (
|
|
BPF_ANY = iota
|
|
BPF_NOEXIST
|
|
BPF_EXIST
|
|
BPF_F_LOCK
|
|
)
|
|
|
|
// Add puts the (key, value) into the eBPF map, only if the key does not exist
|
|
// yet in the map. It returns an error otherwise.
|
|
func (mfd MapFD) Add(key int, value uint64) error {
|
|
return mfd.updateElement(key, value, BPF_NOEXIST)
|
|
}
|
|
|
|
// Change changes the value to an existing key in the eBPF map. It returns an
|
|
// error otherwise.
|
|
func (mfd MapFD) Change(key int, value uint64) error {
|
|
return mfd.updateElement(key, value, BPF_EXIST)
|
|
}
|
|
|
|
// Set puts the (key, value) into the eBPF map. It will create or overwrite an
|
|
// existing entry for that key.
|
|
func (mfd MapFD) Set(key int, value uint64) error {
|
|
return mfd.updateElement(key, value, BPF_ANY)
|
|
}
|
|
|
|
// updateElement is the low level wrapper to bpf_update_elem, used from Add(),
|
|
// Set() and Change().
|
|
func (mfd MapFD) updateElement(key int, value uint64, flag uint64) error {
|
|
/*
|
|
int bpf_update_elem(int fd, const void *key, const void *value, uint64_t flags)
|
|
{
|
|
union bpf_attr attr = {
|
|
.map_fd = fd,
|
|
.key = (__u64) (unsigned long) key,
|
|
.value = (__u64) (unsigned long) value,
|
|
.flags = flags,
|
|
};
|
|
|
|
return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
|
|
}
|
|
*/
|
|
|
|
update_attr := struct {
|
|
map_fd MapFD
|
|
key uint64
|
|
value uint64
|
|
flags uint64
|
|
}{
|
|
map_fd: mfd,
|
|
key: uint64(uintptr(unsafe.Pointer(&key))),
|
|
value: uint64(uintptr(unsafe.Pointer(&value))),
|
|
flags: flag,
|
|
}
|
|
|
|
r, _, err := syscall.Syscall(
|
|
BPF_SYSCALL,
|
|
uintptr(BPF_MAP_UPDATE_ELEM),
|
|
uintptr(unsafe.Pointer(&update_attr)),
|
|
unsafe.Sizeof(update_attr),
|
|
)
|
|
|
|
if r != 0 || err != 0 {
|
|
return fmt.Errorf("couldn't update element: %s", err.Error())
|
|
}
|
|
return nil
|
|
}
|