Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: discovery.process: Detect additional metadata about various executable #6346

Draft
wants to merge 20 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions component/discovery/process/analyze/analyze.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package analyze

import (
"debug/elf"
"io"

"github.com/go-kit/log"
"github.com/go-kit/log/level"
)

const (
labelValueTrue = "true"
labelValueFalse = "false"
)

type Results struct {
Labels map[string]string
}

type Input struct {
PID uint32
PIDs string
File io.ReaderAt
ElfFile *elf.File
}

// analyzerFunc is called with a particular pid and a reader into its binary.
//
// If an error occurs analyzing the binary/process information it is returned.
// If there is strong evidence that this process has been detected, the
// analyzer can return io.EOF and it will skip all following analyzers.
type analyzerFunc func(input Input, analysis *Results) error

func Analyze(logger log.Logger, input Input) *Results {
res := &Results{
Labels: make(map[string]string),
}
for _, a := range []analyzerFunc{
analyzeBinary,
analyzeGo,
analyzePython,
analyzeDotNet,
analyzeJava,
} {
if err := a(input, res); err == io.EOF {
break
} else if err != nil {
level.Warn(logger).Log("msg", "error during", "func", "todo", "err", err)
}
}

return res
}
85 changes: 85 additions & 0 deletions component/discovery/process/analyze/cache/buildid.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package cache

import (
"bytes"
"debug/elf"
"encoding/hex"
"errors"
"fmt"
)

// copypaste from https://github.com/grafana/pyroscope/blob/8a7fe2b80c219bfda9be685ff27ca1dee4218a42/ebpf/symtab/elf/buildid.go#L31

var (
ErrNoBuildIDSection = fmt.Errorf("build ID section not found")
)

func BuildID(f *elf.File) (string, error) {
id, err := GNUBuildID(f)
if err != nil && !errors.Is(err, ErrNoBuildIDSection) {
return "", err
}
if id != "" {
return id, nil
}
id, err = GoBuildID(f)
if err != nil && !errors.Is(err, ErrNoBuildIDSection) {
return "", err
}
if id != "" {
return id, nil
}

return "", ErrNoBuildIDSection
}

var goBuildIDSep = []byte("/")

func GoBuildID(f *elf.File) (string, error) {
buildIDSection := f.Section(".note.go.buildid")
if buildIDSection == nil {
return "", ErrNoBuildIDSection
}

data, err := buildIDSection.Data()
if err != nil {
return "", fmt.Errorf("reading .note.go.buildid %w", err)
}
if len(data) < 17 {
return "", fmt.Errorf(".note.gnu.build-id is too small")
}

data = data[16 : len(data)-1]
if len(data) < 40 || bytes.Count(data, goBuildIDSep) < 2 {
return "", fmt.Errorf("wrong .note.go.buildid ")
}
id := string(data)
if id == "redacted" {
return "", fmt.Errorf("blacklisted .note.go.buildid ")
}
return id, nil
}

func GNUBuildID(f *elf.File) (string, error) {
buildIDSection := f.Section(".note.gnu.build-id")
if buildIDSection == nil {
return "", ErrNoBuildIDSection
}

data, err := buildIDSection.Data()
if err != nil {
return "", fmt.Errorf("reading .note.gnu.build-id %w", err)
}
if len(data) < 16 {
return "", fmt.Errorf(".note.gnu.build-id is too small")
}
if !bytes.Equal([]byte("GNU"), data[12:15]) {
return "", fmt.Errorf(".note.gnu.build-id is not a GNU build-id")
}
rawBuildID := data[16:]
if len(rawBuildID) != 20 && len(rawBuildID) != 8 { // 8 is xxhash, for example in Container-Optimized OS
return "", fmt.Errorf(".note.gnu.build-id has wrong size ")
}
buildIDHex := hex.EncodeToString(rawBuildID)
return buildIDHex, nil
}
146 changes: 146 additions & 0 deletions component/discovery/process/analyze/cache/cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
//go:build linux

package cache

import (
"debug/elf"
"os"
"path/filepath"
"strconv"

"github.com/go-kit/log"
"github.com/grafana/agent/component/discovery/process/analyze"
)

type Cache struct {
l log.Logger
pids map[uint32]*Entry
stats map[Stat]*Entry
buildIDs map[string]*analyze.Results
}

func New(logger log.Logger) *Cache {
return &Cache{
l: logger,
pids: make(map[uint32]*Entry),
stats: make(map[Stat]*Entry),
buildIDs: make(map[string]*analyze.Results),
}
}

type Entry struct {
Results *analyze.Results
Stat Stat
BuildID string
}

func (c *Cache) GetPID(pid uint32) *Entry {
return c.pids[pid]
}

func (c *Cache) Put(pid uint32, a *Entry) {
c.pids[pid] = a
if a.Stat.Inode != 0 && a.Stat.Dev != 0 {
c.stats[a.Stat] = a
}
if a.BuildID != "" {
c.buildIDs[a.BuildID] = a.Results
}
}

func (c *Cache) GetStat(s Stat) *Entry {
return c.stats[s]
}

func (c *Cache) GetBuildID(buildID string) *analyze.Results {
if buildID == "" {
return nil
}
return c.buildIDs[buildID]
}
func (c *Cache) AnalyzePID(pid string) (*analyze.Results, error) {
ipid, _ := strconv.Atoi(pid)
exePath := filepath.Join("/proc", pid, "exe")
return c.AnalyzePIDPath(uint32(ipid), pid, exePath)
}
func (c *Cache) AnalyzePIDPath(pid uint32, pidS string, exePath string) (*analyze.Results, error) {
e := c.GetPID(pid)
if e != nil {
return e.Results, nil
}

// check if executable exists
fi, err := os.Stat(exePath)
if err != nil {
return nil, err
}
st := StatFromFileInfo(fi)
e = c.GetStat(st)
if e != nil {
c.Put(pid, e)
return e.Results, nil
}

// get path to executable
f, err := os.Open(exePath)
if err != nil {
return nil, err
}
defer f.Close()
ef, err := elf.NewFile(f)
if err != nil {
return nil, err
}
defer ef.Close()

buildID, _ := BuildID(ef)
r := c.GetBuildID(buildID)
if r != nil {
c.Put(pid, &Entry{
Results: r,
Stat: st,
BuildID: buildID,
})
return r, nil
}

r = analyze.Analyze(c.l, analyze.Input{
PID: pid,
PIDs: pidS,
File: f,
ElfFile: ef,
})

c.Put(pid, &Entry{
Results: r,
Stat: st,
BuildID: buildID,
})
return r, nil
}

func (c *Cache) GC(active map[uint32]struct{}) {
for pid := range c.pids {
if _, ok := active[pid]; !ok {
delete(c.pids, pid)
}
}
reachableStats := make(map[Stat]struct{})
reachableBuildIDs := make(map[string]struct{})
for _, e := range c.pids {
reachableStats[e.Stat] = struct{}{}
if e.BuildID != "" {
reachableBuildIDs[e.BuildID] = struct{}{}
}
}
for s := range c.stats {
if _, ok := reachableStats[s]; !ok {
delete(c.stats, s)
}
}
for id := range c.buildIDs {
if _, ok := reachableBuildIDs[id]; !ok {
delete(c.buildIDs, id)
}
}
}
94 changes: 94 additions & 0 deletions component/discovery/process/analyze/cache/cache_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
//go:build linux

package cache

import (
"io"
"os"
"testing"

"github.com/grafana/agent/pkg/util"
"github.com/stretchr/testify/require"
)

func copyFile(t *testing.T, src, dst string) {
t.Helper()
s, err := os.Open(src)
if err != nil {
t.Fatal(err)
}
defer s.Close()
d, err := os.Create(dst)
if err != nil {
t.Fatal(err)
}
defer d.Close()
_, err = io.Copy(d, s)
if err != nil {
t.Fatal(err)
}
}

func TestCache(t *testing.T) {
d := t.TempDir()
copyFile(t, "/proc/self/exe", d+"/exe1")
copyFile(t, "/proc/self/exe", d+"/exe2")
err := os.Symlink(d+"/exe1", d+"/exe1-symlink")
require.NoError(t, err)

l := util.TestLogger(t)
c := New(l)
r1, err := c.AnalyzePIDPath(1, "1", d+"/exe1")
require.NoError(t, err)
r2, err := c.AnalyzePIDPath(1, "1", d+"/exe1")
require.NoError(t, err)
require.True(t, r1 == r2)

r3, err := c.AnalyzePIDPath(2, "2", d+"/exe1-symlink")
require.NoError(t, err)
require.True(t, r1 == r3)

require.Equal(t, 2, len(c.pids))
require.Equal(t, 1, len(c.stats))
require.Equal(t, 1, len(c.buildIDs))

r4, err := c.AnalyzePIDPath(3, "3", d+"/exe2")
require.NoError(t, err)
require.True(t, r1 == r4)

require.Equal(t, 3, len(c.pids))
require.Equal(t, 2, len(c.stats))
require.Equal(t, 1, len(c.buildIDs))

c.GC(map[uint32]struct{}{1: {}, 2: {}, 3: {}})

require.Equal(t, 3, len(c.pids))
require.Equal(t, 2, len(c.stats))
require.Equal(t, 1, len(c.buildIDs))

c.GC(map[uint32]struct{}{2: {}, 3: {}})

require.Equal(t, 2, len(c.pids))
require.Equal(t, 2, len(c.stats))
require.Equal(t, 1, len(c.buildIDs))

r3, err = c.AnalyzePIDPath(2, "2", d+"/exe1-symlink")
require.NoError(t, err)
require.True(t, r1 == r3)

r4, err = c.AnalyzePIDPath(3, "3", d+"/exe2")
require.NoError(t, err)
require.True(t, r1 == r4)

c.GC(map[uint32]struct{}{3: {}})

require.Equal(t, 1, len(c.pids))
require.Equal(t, 1, len(c.stats))
require.Equal(t, 1, len(c.buildIDs))

c.GC(map[uint32]struct{}{})

require.Equal(t, 0, len(c.pids))
require.Equal(t, 0, len(c.stats))
require.Equal(t, 0, len(c.buildIDs))
}
Loading
Loading