Skip to content

Commit

Permalink
Add support for 64bit immediate with type 2
Browse files Browse the repository at this point in the history
From the ISA RFC:
5.4.  64-bit immediate instructions

   Instructions with the IMM 'mode' modifier use the wide instruction
   encoding defined in Instruction encoding (Section 3), and use the
   'src_reg' field of the basic instruction to hold an opcode subtype.

   The following table defines a set of {IMM, DW, LD} instructions with
   opcode subtypes in the 'src_reg' field, using new terms such as "map"
   defined further below:

    +=========+================================+==========+==========+
    | src_reg | pseudocode                     | imm type | dst type |
    +=========+================================+==========+==========+
    | 0x0     | dst = (next_imm << 32) | imm   | integer  | integer  |
    +---------+--------------------------------+----------+----------+
    | 0x1     | dst = map_by_fd(imm)           | map fd   | map      |
    +---------+--------------------------------+----------+----------+
    | 0x2     | dst = map_val(map_by_fd(imm))  | map fd   | data     |
    |         | + next_imm                     |          | address  |
    +---------+--------------------------------+----------+----------+
    | 0x3     | dst = var_addr(imm)            | variable | data     |
    |         |                                | id       | address  |
    +---------+--------------------------------+----------+----------+
    | 0x4     | dst = code_addr(imm)           | integer  | code     |
    |         |                                |          | address  |
    +---------+--------------------------------+----------+----------+
    | 0x5     | dst = map_by_idx(imm)          | map      | map      |
    |         |                                | index    |          |
    +---------+--------------------------------+----------+----------+
    | 0x6     | dst = map_val(map_by_idx(imm)) | map      | data     |
    |         | + next_imm                     | index    | address  |
    +---------+--------------------------------+----------+----------+

                 Table 12: 64-bit immediate instructions

Signed-off-by: Alan Jowett <[email protected]>
  • Loading branch information
Alan-Jowett committed Jan 13, 2025
1 parent 46344ae commit 23edeee
Show file tree
Hide file tree
Showing 15 changed files with 187 additions and 29 deletions.
2 changes: 2 additions & 0 deletions src/asm_cfg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,8 @@ static std::string instype(Instruction ins) {
return "arith";
} else if (std::holds_alternative<LoadMapFd>(ins)) {
return "assign";
} else if (std::holds_alternative<LoadMapAddress>(ins)) {
return "assign";
} else if (std::holds_alternative<Assume>(ins)) {
return "assume";
} else {
Expand Down
102 changes: 86 additions & 16 deletions src/asm_files.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,16 +162,20 @@ get_program_name_and_size(const ELFIO::section& sec, const ELFIO::Elf_Xword star
return {program_name, size};
}

void relocate_map(ebpf_inst& inst, const std::string& symbol_name,
void verify_load_instruction(const ebpf_inst& instruction, const std::string& symbol_name, ELFIO::Elf64_Addr offset) {
if ((instruction.opcode & INST_CLS_MASK) != INST_CLS_LD) {
throw UnmarshalError("Illegal operation on symbol " + symbol_name + " at location " +
std::to_string(offset / sizeof(ebpf_inst)));
}
}

void relocate_map(ebpf_inst& reloc_inst, const std::string& symbol_name,
const std::variant<size_t, std::map<std::string, size_t>>& map_record_size_or_map_offsets,
const program_info& info, const ELFIO::Elf64_Addr offset, const ELFIO::Elf_Word index,
const ELFIO::const_symbol_section_accessor& symbols) {
// Only permit loading the address of the map.
if ((inst.opcode & INST_CLS_MASK) != INST_CLS_LD) {
throw UnmarshalError("Illegal operation on symbol " + symbol_name + " at location " +
std::to_string(offset / sizeof(ebpf_inst)));
}
inst.src = 1; // magic number for LoadFd
verify_load_instruction(reloc_inst, symbol_name, offset);
reloc_inst.src = INST_LD_MODE_MAP_FD;

// Relocation value is an offset into the "maps" or ".maps" section.
size_t reloc_value = std::numeric_limits<size_t>::max();
Expand All @@ -187,13 +191,42 @@ void relocate_map(ebpf_inst& inst, const std::string& symbol_name,
const auto it = map_descriptors_offsets.find(symbol_name);
if (it != map_descriptors_offsets.end()) {
reloc_value = it->second;
} else {
throw UnmarshalError("Map descriptor not found for symbol " + symbol_name);
}
}
if (reloc_value >= info.map_descriptors.size()) {
throw UnmarshalError("Bad reloc value (" + std::to_string(reloc_value) + "). " +
"Make sure to compile with -O2.");
}
inst.imm = info.map_descriptors.at(reloc_value).original_fd;
reloc_inst.imm = info.map_descriptors.at(reloc_value).original_fd;
}

void relocate_global_variable(ebpf_inst& reloc_inst, ebpf_inst& next_reloc_inst, const std::string& symbol_name,
const program_info& info,
const std::variant<size_t, std::map<std::string, size_t>>& map_record_size_or_map_offsets,
const ELFIO::Elf64_Addr offset) {
// Only permit loading the address of the global variable.
verify_load_instruction(reloc_inst, symbol_name, offset);

// Copy the immediate value to the next instruction.
next_reloc_inst.imm = reloc_inst.imm;
reloc_inst.src = INST_LD_MODE_MAP_VALUE;

size_t reloc_value = std::numeric_limits<size_t>::max();
auto& map_descriptors_offsets = std::get<1>(map_record_size_or_map_offsets);
const auto it = map_descriptors_offsets.find(symbol_name);
if (it != map_descriptors_offsets.end()) {
reloc_value = it->second;
} else {
throw UnmarshalError("Map descriptor not found for symbol " + symbol_name);
}

if (reloc_value >= info.map_descriptors.size()) {
throw UnmarshalError("Bad reloc value (" + std::to_string(reloc_value) + "). " +
"Make sure to compile with -O2.");
}
reloc_inst.imm = info.map_descriptors.at(reloc_value).original_fd;
}

// Structure used to keep track of subprogram relocation data until any subprograms
Expand Down Expand Up @@ -319,6 +352,7 @@ vector<raw_program> read_elf(std::istream& input_stream, const std::string& path

program_info info{platform};
std::set<ELFIO::Elf_Half> map_section_indices;
std::set<ELFIO::Elf_Half> global_variable_section_indices;

auto btf = reader.sections[".BTF"];
std::optional<libbtf::btf_type_data> btf_data;
Expand All @@ -338,13 +372,17 @@ vector<raw_program> read_elf(std::istream& input_stream, const std::string& path

std::variant<size_t, std::map<std::string, size_t>> map_record_size_or_map_offsets = size_t{0};
ELFIO::const_symbol_section_accessor symbols{reader, symbol_section};
if (!reader.sections[".maps"]) {
bool contains_old_style_map_sections = false;
for (const auto& section : reader.sections) {
if (is_map_section(section->get_name())) {
contains_old_style_map_sections = true;
break;
}
}
if (contains_old_style_map_sections) {
map_record_size_or_map_offsets =
parse_map_sections(options, platform, reader, info.map_descriptors, map_section_indices, symbols);
} else {
if (!btf_data.has_value()) {
throw UnmarshalError("No BTF section found in ELF file " + path);
}
} else if (btf_data.has_value()) {
map_record_size_or_map_offsets = parse_map_section(*btf_data, info.map_descriptors);
// Prevail requires:
// Map fds are sequential starting from 1.
Expand All @@ -366,7 +404,21 @@ vector<raw_program> read_elf(std::istream& input_stream, const std::string& path
map_descriptor.inner_map_fd = type_id_to_fd_map[map_descriptor.inner_map_fd];
}
}
map_section_indices.insert(reader.sections[".maps"]->get_index());
if (reader.sections[".maps"]) {
map_section_indices.insert(reader.sections[".maps"]->get_index());
}

if (reader.sections[".data"]) {
global_variable_section_indices.insert(reader.sections[".data"]->get_index());
}

if (reader.sections[".bss"]) {
global_variable_section_indices.insert(reader.sections[".bss"]->get_index());
}

if (reader.sections[".rodata"]) {
global_variable_section_indices.insert(reader.sections[".rodata"]->get_index());
}
}

vector<raw_program> res;
Expand Down Expand Up @@ -426,12 +478,13 @@ vector<raw_program> read_elf(std::istream& input_stream, const std::string& path
if (offset / sizeof(ebpf_inst) >= prog.prog.size()) {
throw UnmarshalError("Invalid relocation data");
}
ebpf_inst& inst = prog.prog[offset / sizeof(ebpf_inst)];

ebpf_inst& reloc_inst = prog.prog[offset / sizeof(ebpf_inst)];

auto [symbol_name, symbol_section_index] = get_symbol_name_and_section_index(symbols, index);

// Queue up relocation for function symbols.
if (inst.opcode == INST_OP_CALL && inst.src == INST_CALL_LOCAL) {
if (reloc_inst.opcode == INST_OP_CALL && reloc_inst.src == INST_CALL_LOCAL) {
function_relocation fr{.prog_index = res.size(),
.source_offset = offset / sizeof(ebpf_inst),
.relocation_entry_index = index,
Expand All @@ -440,9 +493,26 @@ vector<raw_program> read_elf(std::istream& input_stream, const std::string& path
continue;
}

// Verify that this is a map or global variable relocation.
verify_load_instruction(reloc_inst, symbol_name, offset);

// Load instructions are two instructions long, so we need to check the next instruction.
if (prog.prog.size() <= offset / sizeof(ebpf_inst) + 1) {
throw UnmarshalError("Invalid relocation data");
}
ebpf_inst& next_reloc_inst = prog.prog[offset / sizeof(ebpf_inst) + 1];

// Perform relocation for symbols located in the maps section.
if (map_section_indices.contains(symbol_section_index)) {
relocate_map(inst, symbol_name, map_record_size_or_map_offsets, info, offset, index, symbols);
relocate_map(reloc_inst, symbol_name, map_record_size_or_map_offsets, info, offset, index,
symbols);
continue;
}

if (global_variable_section_indices.contains(symbol_section_index)) {
relocate_global_variable(reloc_inst, next_reloc_inst,
reader.sections[symbol_section_index]->get_name(), info,
map_record_size_or_map_offsets, offset);
continue;
}

Expand Down
15 changes: 11 additions & 4 deletions src/asm_marshal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@ static uint8_t imm_endian(const Un::Op op) {

struct MarshalVisitor {
private:
static vector<ebpf_inst> makeLddw(const Reg dst, const bool isFd, const int32_t imm, const int32_t next_imm) {
static vector<ebpf_inst> makeLddw(const Reg dst, const uint8_t type, const int32_t imm, const int32_t next_imm) {
return {ebpf_inst{.opcode = gsl::narrow<uint8_t>(INST_CLS_LD | width_to_opcode(8)),
.dst = dst.v,
.src = gsl::narrow<uint8_t>(isFd ? 1 : 0),
.src = type,
.offset = 0,
.imm = imm},
ebpf_inst{.opcode = 0, .dst = 0, .src = 0, .offset = 0, .imm = next_imm}};
Expand All @@ -105,14 +105,18 @@ struct MarshalVisitor {
return {};
}

vector<ebpf_inst> operator()(LoadMapFd const& b) const { return makeLddw(b.dst, true, b.mapfd, 0); }
vector<ebpf_inst> operator()(LoadMapFd const& b) const { return makeLddw(b.dst, INST_LD_MODE_MAP_FD, b.mapfd, 0); }

vector<ebpf_inst> operator()(LoadMapAddress const& b) const {
return makeLddw(b.dst, INST_LD_MODE_MAP_VALUE, b.mapfd, b.offset);
}

vector<ebpf_inst> operator()(Bin const& b) const {
if (b.lddw) {
const auto pimm = std::get_if<Imm>(&b.v);
assert(pimm != nullptr);
auto [imm, next_imm] = split(pimm->v);
return makeLddw(b.dst, false, imm, next_imm);
return makeLddw(b.dst, INST_LD_MODE_IMM, imm, next_imm);
}

ebpf_inst res{.opcode = gsl::narrow<uint8_t>((b.is64 ? INST_CLS_ALU64 : INST_CLS_ALU) | (op(b.op) << 4)),
Expand Down Expand Up @@ -304,6 +308,9 @@ static int size(const Instruction& inst) {
if (std::holds_alternative<LoadMapFd>(inst)) {
return 2;
}
if (std::holds_alternative<LoadMapAddress>(inst)) {
return 2;
}
return 1;
}

Expand Down
5 changes: 5 additions & 0 deletions src/asm_ostream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,8 @@ struct CommandPrinterVisitor {

void operator()(LoadMapFd const& b) { os_ << b.dst << " = map_fd " << b.mapfd; }

void operator()(LoadMapAddress const& b) { os_ << b.dst << " = map_val(" << b.mapfd << ") + " << b.offset; }

// llvm-objdump uses "w<number>" for 32-bit operations and "r<number>" for 64-bit operations.
// We use the same convention here for consistency.
static std::string reg_name(Reg const& a, const bool is64) { return ((is64) ? "r" : "w") + std::to_string(a.v); }
Expand Down Expand Up @@ -551,6 +553,9 @@ int size(const Instruction& inst) {
if (std::holds_alternative<LoadMapFd>(inst)) {
return 2;
}
if (std::holds_alternative<LoadMapAddress>(inst)) {
return 2;
}
return 1;
}

Expand Down
13 changes: 13 additions & 0 deletions src/asm_parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ using crab::number_t;
#define DOT "[.]"
#define TYPE R"_(\s*(shared|number|packet|stack|ctx|map_fd|map_fd_programs)\s*)_"

// Match map_val(fd) + offset
#define MAP_VAL R"_(\s*map_val\((\d+)\)\s*\+\s*(\d+)\s*)_"

// Match map_fd fd
#define MAP_FD R"_(\s*map_fd\s+(\d+)\s*)_"

static const std::map<std::string, Bin::Op> str_to_binop = {
{"", Bin::Op::MOV}, {"+", Bin::Op::ADD}, {"-", Bin::Op::SUB}, {"*", Bin::Op::MUL},
{"/", Bin::Op::UDIV}, {"%", Bin::Op::UMOD}, {"|", Bin::Op::OR}, {"&", Bin::Op::AND},
Expand Down Expand Up @@ -164,6 +170,13 @@ Instruction parse_instruction(const std::string& line, const std::map<std::strin
}
return Un{.op = str_to_unop.at(m[2]), .dst = reg(m[1]), .is64 = is64_reg(m[1])};
}
if (regex_match(text, m, regex(WREG ASSIGN MAP_VAL))) {
return LoadMapAddress{
.dst = reg(m[1]), .mapfd = boost::lexical_cast<int>(m[2]), .offset = boost::lexical_cast<int>(m[3])};
}
if (regex_match(text, m, regex(WREG ASSIGN MAP_FD))) {
return LoadMapFd{.dst = reg(m[1]), .mapfd = boost::lexical_cast<int>(m[2])};
}
if (regex_match(text, m, regex(WREG OPASSIGN IMM LONGLONG))) {
const std::string r = m[1];
const bool lddw = !m[4].str().empty();
Expand Down
11 changes: 10 additions & 1 deletion src/asm_syntax.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,15 @@ struct LoadMapFd {
constexpr bool operator==(const LoadMapFd&) const = default;
};

// Load the address of a map value into a register.
struct LoadMapAddress {
Reg dst;
int32_t mapfd{};
int32_t offset{};

constexpr bool operator==(const LoadMapAddress&) const = default;
};

struct Condition {
enum class Op {
EQ,
Expand Down Expand Up @@ -249,7 +258,7 @@ struct IncrementLoopCounter {
};

using Instruction = std::variant<Undefined, Bin, Un, LoadMapFd, Call, CallLocal, Callx, Exit, Jmp, Mem, Packet, Atomic,
Assume, IncrementLoopCounter>;
Assume, IncrementLoopCounter, LoadMapAddress>;

using LabeledInstruction = std::tuple<label_t, Instruction, std::optional<btf_line_info_t>>;
using InstructionSeq = std::vector<LabeledInstruction>;
Expand Down
7 changes: 5 additions & 2 deletions src/asm_unmarshal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ struct Unmarshaller {
if (next.opcode != 0 || next.dst != 0 || next.src != 0 || next.offset != 0) {
throw InvalidInstruction(pc, "invalid lddw");
}
if (inst.src > 1) {
if (inst.src > INST_LD_MODE_MAP_VALUE) {
throw InvalidInstruction(pc, make_opcode_message("bad instruction", inst.opcode));
}
if (inst.offset != 0) {
Expand All @@ -439,14 +439,17 @@ struct Unmarshaller {
throw InvalidInstruction(pc, "bad register");
}

if (inst.src == 1) {
if (inst.src == INST_LD_MODE_MAP_FD) {
// magic number, meaning we're a per-process file descriptor defining the map.
// (for details, look for BPF_PSEUDO_MAP_FD in the kernel)
if (next.imm != 0) {
throw InvalidInstruction(pc, "lddw uses reserved fields");
}
return LoadMapFd{.dst = Reg{inst.dst}, .mapfd = inst.imm};
}
if (inst.src == INST_LD_MODE_MAP_VALUE) {
return LoadMapAddress{.dst = Reg{inst.dst}, .mapfd = inst.imm, .offset = next_imm};
}

return Bin{
.op = Bin::Op::MOV,
Expand Down
1 change: 1 addition & 0 deletions src/assertions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class AssertExtractor {
vector<Assertion> operator()(const IncrementLoopCounter& ipc) const { return {{BoundedLoopCount{ipc.name}}}; }

vector<Assertion> operator()(const LoadMapFd&) const { return {}; }
vector<Assertion> operator()(const LoadMapAddress&) const { return {}; }

/// Packet access implicitly uses R6, so verify that R6 still has a pointer to the context.
vector<Assertion> operator()(const Packet&) const { return zero_offset_ctx({6}); }
Expand Down
16 changes: 16 additions & 0 deletions src/crab/ebpf_transformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class ebpf_transformer final {
void operator()(const IncrementLoopCounter&);
void operator()(const Jmp&) const;
void operator()(const LoadMapFd&);
void operator()(const LoadMapAddress&);
void operator()(const Mem&);
void operator()(const Packet&);
void operator()(const Un&);
Expand Down Expand Up @@ -113,6 +114,7 @@ class ebpf_transformer final {
void havoc_subprogram_stack(const std::string& prefix);
void forget_packet_pointers();
void do_load_mapfd(const Reg& dst_reg, int mapfd, bool maybe_null);
void do_load_map_address(const Reg& dst_reg, const int mapfd, int32_t offset);

void assign_valid_ptr(const Reg& dst_reg, bool maybe_null);

Expand Down Expand Up @@ -1839,6 +1841,20 @@ void ebpf_transformer::do_load_mapfd(const Reg& dst_reg, const int mapfd, const

void ebpf_transformer::operator()(const LoadMapFd& ins) { do_load_mapfd(ins.dst, ins.mapfd, false); }

void ebpf_transformer::do_load_map_address(const Reg& dst_reg, const int mapfd, int32_t offset) {
const EbpfMapDescriptor& desc = thread_local_program_info->platform->get_map_descriptor(mapfd);
const EbpfMapType& type = thread_local_program_info->platform->get_map_type(desc.type);

// Set the shared region size and offset for the map.
type_inv.assign_type(m_inv, dst_reg, T_SHARED);
const reg_pack_t& dst = reg_pack(dst_reg);
m_inv.assign(dst.shared_offset, offset);
m_inv.assign(dst.shared_region_size, desc.value_size);
assign_valid_ptr(dst_reg, false);
}

void ebpf_transformer::operator()(const LoadMapAddress& ins) { do_load_map_address(ins.dst, ins.mapfd, ins.offset); }

void ebpf_transformer::assign_valid_ptr(const Reg& dst_reg, const bool maybe_null) {
using namespace crab::dsl_syntax;
const reg_pack_t& reg = reg_pack(dst_reg);
Expand Down
4 changes: 4 additions & 0 deletions src/ebpf_vm_isa.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ enum {
INST_ALU_OP_ARSH = 0xc0,
INST_ALU_OP_END = 0xd0,
INST_ALU_OP_MASK = 0xf0,

INST_LD_MODE_IMM = 0x0, // 64-bit immediate value
INST_LD_MODE_MAP_FD = 0x1, // Load map fd
INST_LD_MODE_MAP_VALUE = 0x2, // Load map value
};

enum {
Expand Down
Loading

0 comments on commit 23edeee

Please sign in to comment.