Skip to content

Commit

Permalink
Merge branch 'vbpf:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Alan-Jowett authored Jan 24, 2025
2 parents 43d7d76 + 0a5bc2f commit 5c33736
Show file tree
Hide file tree
Showing 14 changed files with 189 additions and 49 deletions.
2 changes: 2 additions & 0 deletions src/asm_cfg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,8 @@ static std::string instype(Instruction ins) {
return "arith";
} else if (std::holds_alternative<LoadMapFd>(ins)) {
return "assign";
} else if (std::holds_alternative<LoadMapAddress>(ins)) {
return "assign";
} else if (std::holds_alternative<Assume>(ins)) {
return "assume";
} else {
Expand Down
92 changes: 76 additions & 16 deletions src/asm_files.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,16 +162,20 @@ get_program_name_and_size(const ELFIO::section& sec, const ELFIO::Elf_Xword star
return {program_name, size};
}

void relocate_map(ebpf_inst& inst, const std::string& symbol_name,
void verify_load_instruction(const ebpf_inst& instruction, const std::string& symbol_name, ELFIO::Elf64_Addr offset) {
if ((instruction.opcode & INST_CLS_MASK) != INST_CLS_LD) {
throw UnmarshalError("Illegal operation on symbol " + symbol_name + " at location " +
std::to_string(offset / sizeof(ebpf_inst)));
}
}

void relocate_map(ebpf_inst& reloc_inst, const std::string& symbol_name,
const std::variant<size_t, std::map<std::string, size_t>>& map_record_size_or_map_offsets,
const program_info& info, const ELFIO::Elf64_Addr offset, const ELFIO::Elf_Word index,
const ELFIO::const_symbol_section_accessor& symbols) {
// Only permit loading the address of the map.
if ((inst.opcode & INST_CLS_MASK) != INST_CLS_LD) {
throw UnmarshalError("Illegal operation on symbol " + symbol_name + " at location " +
std::to_string(offset / sizeof(ebpf_inst)));
}
inst.src = 1; // magic number for LoadFd
verify_load_instruction(reloc_inst, symbol_name, offset);
reloc_inst.src = INST_LD_MODE_MAP_FD;

// Relocation value is an offset into the "maps" or ".maps" section.
size_t reloc_value = std::numeric_limits<size_t>::max();
Expand All @@ -187,13 +191,42 @@ void relocate_map(ebpf_inst& inst, const std::string& symbol_name,
const auto it = map_descriptors_offsets.find(symbol_name);
if (it != map_descriptors_offsets.end()) {
reloc_value = it->second;
} else {
throw UnmarshalError("Map descriptor not found for symbol " + symbol_name);
}
}
if (reloc_value >= info.map_descriptors.size()) {
throw UnmarshalError("Bad reloc value (" + std::to_string(reloc_value) + "). " +
"Make sure to compile with -O2.");
}
inst.imm = info.map_descriptors.at(reloc_value).original_fd;
reloc_inst.imm = info.map_descriptors.at(reloc_value).original_fd;
}

void relocate_global_variable(ebpf_inst& reloc_inst, ebpf_inst& next_reloc_inst, const std::string& symbol_name,
const program_info& info,
const std::variant<size_t, std::map<std::string, size_t>>& map_record_size_or_map_offsets,
const ELFIO::Elf64_Addr offset) {
// Only permit loading the address of the global variable.
verify_load_instruction(reloc_inst, symbol_name, offset);

// Copy the immediate value to the next instruction.
next_reloc_inst.imm = reloc_inst.imm;
reloc_inst.src = INST_LD_MODE_MAP_VALUE;

size_t reloc_value = std::numeric_limits<size_t>::max();
auto& map_descriptors_offsets = std::get<1>(map_record_size_or_map_offsets);
const auto it = map_descriptors_offsets.find(symbol_name);
if (it != map_descriptors_offsets.end()) {
reloc_value = it->second;
} else {
throw UnmarshalError("Map descriptor not found for symbol " + symbol_name);
}

if (reloc_value >= info.map_descriptors.size()) {
throw UnmarshalError("Bad reloc value (" + std::to_string(reloc_value) + "). " +
"Make sure to compile with -O2.");
}
reloc_inst.imm = info.map_descriptors.at(reloc_value).original_fd;
}

// Structure used to keep track of subprogram relocation data until any subprograms
Expand Down Expand Up @@ -319,6 +352,7 @@ vector<raw_program> read_elf(std::istream& input_stream, const std::string& path

program_info info{platform};
std::set<ELFIO::Elf_Half> map_section_indices;
std::set<ELFIO::Elf_Half> global_variable_section_indices;

auto btf = reader.sections[".BTF"];
std::optional<libbtf::btf_type_data> btf_data;
Expand All @@ -338,13 +372,11 @@ vector<raw_program> read_elf(std::istream& input_stream, const std::string& path

std::variant<size_t, std::map<std::string, size_t>> map_record_size_or_map_offsets = size_t{0};
ELFIO::const_symbol_section_accessor symbols{reader, symbol_section};
if (!reader.sections[".maps"]) {

if (std::ranges::any_of(reader.sections, [](const auto& section) { return is_map_section(section->get_name()); })) {
map_record_size_or_map_offsets =
parse_map_sections(options, platform, reader, info.map_descriptors, map_section_indices, symbols);
} else {
if (!btf_data.has_value()) {
throw UnmarshalError("No BTF section found in ELF file " + path);
}
} else if (btf_data.has_value()) {
map_record_size_or_map_offsets = parse_map_section(*btf_data, info.map_descriptors);
// Prevail requires:
// Map fds are sequential starting from 1.
Expand All @@ -366,7 +398,17 @@ vector<raw_program> read_elf(std::istream& input_stream, const std::string& path
map_descriptor.inner_map_fd = type_id_to_fd_map[map_descriptor.inner_map_fd];
}
}
map_section_indices.insert(reader.sections[".maps"]->get_index());
if (reader.sections[".maps"]) {
map_section_indices.insert(reader.sections[".maps"]->get_index());
}

for (auto section_name : {".rodata", ".data", ".bss"}) {
if (const auto section = reader.sections[section_name]) {
if (section->get_size() != 0) {
global_variable_section_indices.insert(section->get_index());
}
}
}
}

vector<raw_program> res;
Expand Down Expand Up @@ -426,12 +468,13 @@ vector<raw_program> read_elf(std::istream& input_stream, const std::string& path
if (offset / sizeof(ebpf_inst) >= prog.prog.size()) {
throw UnmarshalError("Invalid relocation data");
}
ebpf_inst& inst = prog.prog[offset / sizeof(ebpf_inst)];

ebpf_inst& reloc_inst = prog.prog[offset / sizeof(ebpf_inst)];

auto [symbol_name, symbol_section_index] = get_symbol_name_and_section_index(symbols, index);

// Queue up relocation for function symbols.
if (inst.opcode == INST_OP_CALL && inst.src == INST_CALL_LOCAL) {
if (reloc_inst.opcode == INST_OP_CALL && reloc_inst.src == INST_CALL_LOCAL) {
function_relocation fr{.prog_index = res.size(),
.source_offset = offset / sizeof(ebpf_inst),
.relocation_entry_index = index,
Expand All @@ -440,9 +483,26 @@ vector<raw_program> read_elf(std::istream& input_stream, const std::string& path
continue;
}

// Verify that this is a map or global variable relocation.
verify_load_instruction(reloc_inst, symbol_name, offset);

// Load instructions are two instructions long, so we need to check the next instruction.
if (prog.prog.size() <= offset / sizeof(ebpf_inst) + 1) {
throw UnmarshalError("Invalid relocation data");
}
ebpf_inst& next_reloc_inst = prog.prog[offset / sizeof(ebpf_inst) + 1];

// Perform relocation for symbols located in the maps section.
if (map_section_indices.contains(symbol_section_index)) {
relocate_map(inst, symbol_name, map_record_size_or_map_offsets, info, offset, index, symbols);
relocate_map(reloc_inst, symbol_name, map_record_size_or_map_offsets, info, offset, index,
symbols);
continue;
}

if (global_variable_section_indices.contains(symbol_section_index)) {
relocate_global_variable(reloc_inst, next_reloc_inst,
reader.sections[symbol_section_index]->get_name(), info,
map_record_size_or_map_offsets, offset);
continue;
}

Expand Down
17 changes: 12 additions & 5 deletions src/asm_marshal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@ static uint8_t imm_endian(const Un::Op op) {

struct MarshalVisitor {
private:
static vector<ebpf_inst> makeLddw(const Reg dst, const bool isFd, const int32_t imm, const int32_t next_imm) {
static vector<ebpf_inst> makeLddw(const Reg dst, const uint8_t type, const int32_t imm, const int32_t next_imm) {
return {ebpf_inst{.opcode = gsl::narrow<uint8_t>(INST_CLS_LD | width_to_opcode(8)),
.dst = dst.v,
.src = gsl::narrow<uint8_t>(isFd ? 1 : 0),
.src = type,
.offset = 0,
.imm = imm},
ebpf_inst{.opcode = 0, .dst = 0, .src = 0, .offset = 0, .imm = next_imm}};
Expand All @@ -105,14 +105,18 @@ struct MarshalVisitor {
return {};
}

vector<ebpf_inst> operator()(LoadMapFd const& b) const { return makeLddw(b.dst, true, b.mapfd, 0); }
vector<ebpf_inst> operator()(LoadMapFd const& b) const { return makeLddw(b.dst, INST_LD_MODE_MAP_FD, b.mapfd, 0); }

vector<ebpf_inst> operator()(LoadMapAddress const& b) const {
return makeLddw(b.dst, INST_LD_MODE_MAP_VALUE, b.mapfd, b.offset);
}

vector<ebpf_inst> operator()(Bin const& b) const {
if (b.lddw) {
const auto pimm = std::get_if<Imm>(&b.v);
assert(pimm != nullptr);
auto [imm, next_imm] = split(pimm->v);
return makeLddw(b.dst, false, imm, next_imm);
return makeLddw(b.dst, INST_LD_MODE_IMM, imm, next_imm);
}

ebpf_inst res{.opcode = gsl::narrow<uint8_t>((b.is64 ? INST_CLS_ALU64 : INST_CLS_ALU) | (op(b.op) << 4)),
Expand Down Expand Up @@ -295,7 +299,7 @@ vector<ebpf_inst> marshal(const Instruction& ins, const pc_t pc) {
return std::visit(MarshalVisitor{crab::label_to_offset16(pc), crab::label_to_offset32(pc)}, ins);
}

static int size(const Instruction& inst) {
int asm_syntax::size(const Instruction& inst) {
if (const auto pins = std::get_if<Bin>(&inst)) {
if (pins->lddw) {
return 2;
Expand All @@ -304,6 +308,9 @@ static int size(const Instruction& inst) {
if (std::holds_alternative<LoadMapFd>(inst)) {
return 2;
}
if (std::holds_alternative<LoadMapAddress>(inst)) {
return 2;
}
return 1;
}

Expand Down
14 changes: 2 additions & 12 deletions src/asm_ostream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,8 @@ struct CommandPrinterVisitor {

void operator()(LoadMapFd const& b) { os_ << b.dst << " = map_fd " << b.mapfd; }

void operator()(LoadMapAddress const& b) { os_ << b.dst << " = map_val(" << b.mapfd << ") + " << b.offset; }

// llvm-objdump uses "w<number>" for 32-bit operations and "r<number>" for 64-bit operations.
// We use the same convention here for consistency.
static std::string reg_name(Reg const& a, const bool is64) { return ((is64) ? "r" : "w") + std::to_string(a.v); }
Expand Down Expand Up @@ -542,18 +544,6 @@ string to_string(Assertion const& constraint) {
return str.str();
}

int size(const Instruction& inst) {
if (const auto bin = std::get_if<Bin>(&inst)) {
if (bin->lddw) {
return 2;
}
}
if (std::holds_alternative<LoadMapFd>(inst)) {
return 2;
}
return 1;
}

auto get_labels(const InstructionSeq& insts) {
pc_t pc = 0;
std::map<label_t, pc_t> pc_of_label;
Expand Down
13 changes: 13 additions & 0 deletions src/asm_parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ using crab::number_t;
#define DOT "[.]"
#define TYPE R"_(\s*(shared|number|packet|stack|ctx|map_fd|map_fd_programs)\s*)_"

// Match map_val(fd) + offset
#define MAP_VAL R"_(\s*map_val\((\d+)\)\s*\+\s*(\d+)\s*)_"

// Match map_fd fd
#define MAP_FD R"_(\s*map_fd\s+(\d+)\s*)_"

static const std::map<std::string, Bin::Op> str_to_binop = {
{"", Bin::Op::MOV}, {"+", Bin::Op::ADD}, {"-", Bin::Op::SUB}, {"*", Bin::Op::MUL},
{"/", Bin::Op::UDIV}, {"%", Bin::Op::UMOD}, {"|", Bin::Op::OR}, {"&", Bin::Op::AND},
Expand Down Expand Up @@ -164,6 +170,13 @@ Instruction parse_instruction(const std::string& line, const std::map<std::strin
}
return Un{.op = str_to_unop.at(m[2]), .dst = reg(m[1]), .is64 = is64_reg(m[1])};
}
if (regex_match(text, m, regex(WREG ASSIGN MAP_VAL))) {
return LoadMapAddress{
.dst = reg(m[1]), .mapfd = boost::lexical_cast<int>(m[2]), .offset = boost::lexical_cast<int>(m[3])};
}
if (regex_match(text, m, regex(WREG ASSIGN MAP_FD))) {
return LoadMapFd{.dst = reg(m[1]), .mapfd = boost::lexical_cast<int>(m[2])};
}
if (regex_match(text, m, regex(WREG OPASSIGN IMM LONGLONG))) {
const std::string r = m[1];
const bool lddw = !m[4].str().empty();
Expand Down
13 changes: 12 additions & 1 deletion src/asm_syntax.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,15 @@ struct LoadMapFd {
constexpr bool operator==(const LoadMapFd&) const = default;
};

// Load the address of a map value into a register.
struct LoadMapAddress {
Reg dst; // Destination register to store the address of the map value.
int32_t mapfd{}; // File descriptor of the map to load the address from.
int32_t offset{}; // Offset within the map, must be within bounds.

constexpr bool operator==(const LoadMapAddress&) const = default;
};

struct Condition {
enum class Op {
EQ,
Expand Down Expand Up @@ -249,7 +258,7 @@ struct IncrementLoopCounter {
};

using Instruction = std::variant<Undefined, Bin, Un, LoadMapFd, Call, CallLocal, Callx, Exit, Jmp, Mem, Packet, Atomic,
Assume, IncrementLoopCounter>;
Assume, IncrementLoopCounter, LoadMapAddress>;

using LabeledInstruction = std::tuple<label_t, Instruction, std::optional<btf_line_info_t>>;
using InstructionSeq = std::vector<LabeledInstruction>;
Expand Down Expand Up @@ -374,6 +383,8 @@ std::string to_string(const Assertion& constraint);
void print(const InstructionSeq& insts, std::ostream& out, const std::optional<const label_t>& label_to_print,
bool print_line_info = false);

int size(const Instruction& inst);

} // namespace asm_syntax

using namespace asm_syntax;
Expand Down
24 changes: 14 additions & 10 deletions src/asm_unmarshal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ struct Unmarshaller {
if (next.opcode != 0 || next.dst != 0 || next.src != 0 || next.offset != 0) {
throw InvalidInstruction(pc, "invalid lddw");
}
if (inst.src > 1) {
if (inst.src > INST_LD_MODE_MAP_VALUE) {
throw InvalidInstruction(pc, make_opcode_message("bad instruction", inst.opcode));
}
if (inst.offset != 0) {
Expand All @@ -439,22 +439,26 @@ struct Unmarshaller {
throw InvalidInstruction(pc, "bad register");
}

if (inst.src == 1) {
switch (inst.src) {
case INST_LD_MODE_IMM:
return Bin{
.op = Bin::Op::MOV,
.dst = Reg{inst.dst},
.v = Imm{merge(inst.imm, next_imm)},
.is64 = true,
.lddw = true,
};
case INST_LD_MODE_MAP_FD: {
// magic number, meaning we're a per-process file descriptor defining the map.
// (for details, look for BPF_PSEUDO_MAP_FD in the kernel)
if (next.imm != 0) {
throw InvalidInstruction(pc, "lddw uses reserved fields");
}
return LoadMapFd{.dst = Reg{inst.dst}, .mapfd = inst.imm};
}

return Bin{
.op = Bin::Op::MOV,
.dst = Reg{inst.dst},
.v = Imm{merge(inst.imm, next_imm)},
.is64 = true,
.lddw = true,
};
case INST_LD_MODE_MAP_VALUE: return LoadMapAddress{.dst = Reg{inst.dst}, .mapfd = inst.imm, .offset = next_imm};
default: throw InvalidInstruction(pc, make_opcode_message("bad instruction", inst.opcode));
}
}

static ArgSingle::Kind toArgSingleKind(const ebpf_argument_type_t t) {
Expand Down
1 change: 1 addition & 0 deletions src/assertions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class AssertExtractor {
vector<Assertion> operator()(const IncrementLoopCounter& ipc) const { return {{BoundedLoopCount{ipc.name}}}; }

vector<Assertion> operator()(const LoadMapFd&) const { return {}; }
vector<Assertion> operator()(const LoadMapAddress&) const { return {}; }

/// Packet access implicitly uses R6, so verify that R6 still has a pointer to the context.
vector<Assertion> operator()(const Packet&) const { return zero_offset_ctx({6}); }
Expand Down
Loading

0 comments on commit 5c33736

Please sign in to comment.