From 43d7d76023f7669f44b4198a616e1d48cd850013 Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Tue, 21 Jan 2025 13:47:22 -0800 Subject: [PATCH 1/5] Fix unused parameter warning Signed-off-by: Alan Jowett --- src/crab/split_dbm.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/crab/split_dbm.hpp b/src/crab/split_dbm.hpp index 82ef3189..2c9dfe38 100644 --- a/src/crab/split_dbm.hpp +++ b/src/crab/split_dbm.hpp @@ -209,8 +209,8 @@ class SplitDBM final { SplitDBM widen(const SplitDBM& o) const; [[nodiscard]] - SplitDBM widening_thresholds(const SplitDBM& o, const iterators::thresholds_t& ts) const { - // TODO: use thresholds + SplitDBM widening_thresholds(const SplitDBM& o, const iterators::thresholds_t&) const { + // TODO: use thresholds. Threshold is anonymous until used to prevent unused parameter warning. return this->widen(o); } From 9c4fe7b75985567311c1289cd4c62e09a0ddd0e0 Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Tue, 7 Jan 2025 11:07:54 -0800 Subject: [PATCH 2/5] Add support for 64bit immediate with type 2 From the ISA RFC: 5.4. 64-bit immediate instructions Instructions with the IMM 'mode' modifier use the wide instruction encoding defined in Instruction encoding (Section 3), and use the 'src_reg' field of the basic instruction to hold an opcode subtype. The following table defines a set of {IMM, DW, LD} instructions with opcode subtypes in the 'src_reg' field, using new terms such as "map" defined further below: +=========+================================+==========+==========+ | src_reg | pseudocode | imm type | dst type | +=========+================================+==========+==========+ | 0x0 | dst = (next_imm << 32) | imm | integer | integer | +---------+--------------------------------+----------+----------+ | 0x1 | dst = map_by_fd(imm) | map fd | map | +---------+--------------------------------+----------+----------+ | 0x2 | dst = map_val(map_by_fd(imm)) | map fd | data | | | + next_imm | | address | +---------+--------------------------------+----------+----------+ | 0x3 | dst = var_addr(imm) | variable | data | | | | id | address | +---------+--------------------------------+----------+----------+ | 0x4 | dst = code_addr(imm) | integer | code | | | | | address | +---------+--------------------------------+----------+----------+ | 0x5 | dst = map_by_idx(imm) | map | map | | | | index | | +---------+--------------------------------+----------+----------+ | 0x6 | dst = map_val(map_by_idx(imm)) | map | data | | | + next_imm | index | address | +---------+--------------------------------+----------+----------+ Table 12: 64-bit immediate instructions Signed-off-by: Alan Jowett --- src/asm_cfg.cpp | 2 + src/asm_files.cpp | 102 ++++++++++++++++++++++++++++------ src/asm_marshal.cpp | 15 +++-- src/asm_ostream.cpp | 5 ++ src/asm_parse.cpp | 13 +++++ src/asm_syntax.hpp | 11 +++- src/asm_unmarshal.cpp | 7 ++- src/assertions.cpp | 1 + src/crab/ebpf_transformer.cpp | 16 ++++++ src/ebpf_vm_isa.hpp | 4 ++ src/test/test_marshal.cpp | 7 ++- src/test/test_verify.cpp | 4 +- src/test/test_yaml.cpp | 1 + test-data/map.yaml | 26 +++++++++ 14 files changed, 186 insertions(+), 28 deletions(-) create mode 100644 test-data/map.yaml diff --git a/src/asm_cfg.cpp b/src/asm_cfg.cpp index acaf689f..acbbc169 100644 --- a/src/asm_cfg.cpp +++ b/src/asm_cfg.cpp @@ -399,6 +399,8 @@ static std::string instype(Instruction ins) { return "arith"; } else if (std::holds_alternative(ins)) { return "assign"; + } else if (std::holds_alternative(ins)) { + return "assign"; } else if (std::holds_alternative(ins)) { return "assume"; } else { diff --git a/src/asm_files.cpp b/src/asm_files.cpp index 551a8881..c17920df 100644 --- a/src/asm_files.cpp +++ b/src/asm_files.cpp @@ -162,16 +162,20 @@ get_program_name_and_size(const ELFIO::section& sec, const ELFIO::Elf_Xword star return {program_name, size}; } -void relocate_map(ebpf_inst& inst, const std::string& symbol_name, +void verify_load_instruction(const ebpf_inst& instruction, const std::string& symbol_name, ELFIO::Elf64_Addr offset) { + if ((instruction.opcode & INST_CLS_MASK) != INST_CLS_LD) { + throw UnmarshalError("Illegal operation on symbol " + symbol_name + " at location " + + std::to_string(offset / sizeof(ebpf_inst))); + } +} + +void relocate_map(ebpf_inst& reloc_inst, const std::string& symbol_name, const std::variant>& map_record_size_or_map_offsets, const program_info& info, const ELFIO::Elf64_Addr offset, const ELFIO::Elf_Word index, const ELFIO::const_symbol_section_accessor& symbols) { // Only permit loading the address of the map. - if ((inst.opcode & INST_CLS_MASK) != INST_CLS_LD) { - throw UnmarshalError("Illegal operation on symbol " + symbol_name + " at location " + - std::to_string(offset / sizeof(ebpf_inst))); - } - inst.src = 1; // magic number for LoadFd + verify_load_instruction(reloc_inst, symbol_name, offset); + reloc_inst.src = INST_LD_MODE_MAP_FD; // Relocation value is an offset into the "maps" or ".maps" section. size_t reloc_value = std::numeric_limits::max(); @@ -187,13 +191,42 @@ void relocate_map(ebpf_inst& inst, const std::string& symbol_name, const auto it = map_descriptors_offsets.find(symbol_name); if (it != map_descriptors_offsets.end()) { reloc_value = it->second; + } else { + throw UnmarshalError("Map descriptor not found for symbol " + symbol_name); } } if (reloc_value >= info.map_descriptors.size()) { throw UnmarshalError("Bad reloc value (" + std::to_string(reloc_value) + "). " + "Make sure to compile with -O2."); } - inst.imm = info.map_descriptors.at(reloc_value).original_fd; + reloc_inst.imm = info.map_descriptors.at(reloc_value).original_fd; +} + +void relocate_global_variable(ebpf_inst& reloc_inst, ebpf_inst& next_reloc_inst, const std::string& symbol_name, + const program_info& info, + const std::variant>& map_record_size_or_map_offsets, + const ELFIO::Elf64_Addr offset) { + // Only permit loading the address of the global variable. + verify_load_instruction(reloc_inst, symbol_name, offset); + + // Copy the immediate value to the next instruction. + next_reloc_inst.imm = reloc_inst.imm; + reloc_inst.src = INST_LD_MODE_MAP_VALUE; + + size_t reloc_value = std::numeric_limits::max(); + auto& map_descriptors_offsets = std::get<1>(map_record_size_or_map_offsets); + const auto it = map_descriptors_offsets.find(symbol_name); + if (it != map_descriptors_offsets.end()) { + reloc_value = it->second; + } else { + throw UnmarshalError("Map descriptor not found for symbol " + symbol_name); + } + + if (reloc_value >= info.map_descriptors.size()) { + throw UnmarshalError("Bad reloc value (" + std::to_string(reloc_value) + "). " + + "Make sure to compile with -O2."); + } + reloc_inst.imm = info.map_descriptors.at(reloc_value).original_fd; } // Structure used to keep track of subprogram relocation data until any subprograms @@ -319,6 +352,7 @@ vector read_elf(std::istream& input_stream, const std::string& path program_info info{platform}; std::set map_section_indices; + std::set global_variable_section_indices; auto btf = reader.sections[".BTF"]; std::optional btf_data; @@ -338,13 +372,17 @@ vector read_elf(std::istream& input_stream, const std::string& path std::variant> map_record_size_or_map_offsets = size_t{0}; ELFIO::const_symbol_section_accessor symbols{reader, symbol_section}; - if (!reader.sections[".maps"]) { + bool contains_old_style_map_sections = false; + for (const auto& section : reader.sections) { + if (is_map_section(section->get_name())) { + contains_old_style_map_sections = true; + break; + } + } + if (contains_old_style_map_sections) { map_record_size_or_map_offsets = parse_map_sections(options, platform, reader, info.map_descriptors, map_section_indices, symbols); - } else { - if (!btf_data.has_value()) { - throw UnmarshalError("No BTF section found in ELF file " + path); - } + } else if (btf_data.has_value()) { map_record_size_or_map_offsets = parse_map_section(*btf_data, info.map_descriptors); // Prevail requires: // Map fds are sequential starting from 1. @@ -366,7 +404,21 @@ vector read_elf(std::istream& input_stream, const std::string& path map_descriptor.inner_map_fd = type_id_to_fd_map[map_descriptor.inner_map_fd]; } } - map_section_indices.insert(reader.sections[".maps"]->get_index()); + if (reader.sections[".maps"]) { + map_section_indices.insert(reader.sections[".maps"]->get_index()); + } + + if (reader.sections[".data"]) { + global_variable_section_indices.insert(reader.sections[".data"]->get_index()); + } + + if (reader.sections[".bss"]) { + global_variable_section_indices.insert(reader.sections[".bss"]->get_index()); + } + + if (reader.sections[".rodata"]) { + global_variable_section_indices.insert(reader.sections[".rodata"]->get_index()); + } } vector res; @@ -426,12 +478,13 @@ vector read_elf(std::istream& input_stream, const std::string& path if (offset / sizeof(ebpf_inst) >= prog.prog.size()) { throw UnmarshalError("Invalid relocation data"); } - ebpf_inst& inst = prog.prog[offset / sizeof(ebpf_inst)]; + + ebpf_inst& reloc_inst = prog.prog[offset / sizeof(ebpf_inst)]; auto [symbol_name, symbol_section_index] = get_symbol_name_and_section_index(symbols, index); // Queue up relocation for function symbols. - if (inst.opcode == INST_OP_CALL && inst.src == INST_CALL_LOCAL) { + if (reloc_inst.opcode == INST_OP_CALL && reloc_inst.src == INST_CALL_LOCAL) { function_relocation fr{.prog_index = res.size(), .source_offset = offset / sizeof(ebpf_inst), .relocation_entry_index = index, @@ -440,9 +493,26 @@ vector read_elf(std::istream& input_stream, const std::string& path continue; } + // Verify that this is a map or global variable relocation. + verify_load_instruction(reloc_inst, symbol_name, offset); + + // Load instructions are two instructions long, so we need to check the next instruction. + if (prog.prog.size() <= offset / sizeof(ebpf_inst) + 1) { + throw UnmarshalError("Invalid relocation data"); + } + ebpf_inst& next_reloc_inst = prog.prog[offset / sizeof(ebpf_inst) + 1]; + // Perform relocation for symbols located in the maps section. if (map_section_indices.contains(symbol_section_index)) { - relocate_map(inst, symbol_name, map_record_size_or_map_offsets, info, offset, index, symbols); + relocate_map(reloc_inst, symbol_name, map_record_size_or_map_offsets, info, offset, index, + symbols); + continue; + } + + if (global_variable_section_indices.contains(symbol_section_index)) { + relocate_global_variable(reloc_inst, next_reloc_inst, + reader.sections[symbol_section_index]->get_name(), info, + map_record_size_or_map_offsets, offset); continue; } diff --git a/src/asm_marshal.cpp b/src/asm_marshal.cpp index 953435b7..8ed5d3ef 100644 --- a/src/asm_marshal.cpp +++ b/src/asm_marshal.cpp @@ -87,10 +87,10 @@ static uint8_t imm_endian(const Un::Op op) { struct MarshalVisitor { private: - static vector makeLddw(const Reg dst, const bool isFd, const int32_t imm, const int32_t next_imm) { + static vector makeLddw(const Reg dst, const uint8_t type, const int32_t imm, const int32_t next_imm) { return {ebpf_inst{.opcode = gsl::narrow(INST_CLS_LD | width_to_opcode(8)), .dst = dst.v, - .src = gsl::narrow(isFd ? 1 : 0), + .src = type, .offset = 0, .imm = imm}, ebpf_inst{.opcode = 0, .dst = 0, .src = 0, .offset = 0, .imm = next_imm}}; @@ -105,14 +105,18 @@ struct MarshalVisitor { return {}; } - vector operator()(LoadMapFd const& b) const { return makeLddw(b.dst, true, b.mapfd, 0); } + vector operator()(LoadMapFd const& b) const { return makeLddw(b.dst, INST_LD_MODE_MAP_FD, b.mapfd, 0); } + + vector operator()(LoadMapAddress const& b) const { + return makeLddw(b.dst, INST_LD_MODE_MAP_VALUE, b.mapfd, b.offset); + } vector operator()(Bin const& b) const { if (b.lddw) { const auto pimm = std::get_if(&b.v); assert(pimm != nullptr); auto [imm, next_imm] = split(pimm->v); - return makeLddw(b.dst, false, imm, next_imm); + return makeLddw(b.dst, INST_LD_MODE_IMM, imm, next_imm); } ebpf_inst res{.opcode = gsl::narrow((b.is64 ? INST_CLS_ALU64 : INST_CLS_ALU) | (op(b.op) << 4)), @@ -304,6 +308,9 @@ static int size(const Instruction& inst) { if (std::holds_alternative(inst)) { return 2; } + if (std::holds_alternative(inst)) { + return 2; + } return 1; } diff --git a/src/asm_ostream.cpp b/src/asm_ostream.cpp index 1fb9fab4..941932f4 100644 --- a/src/asm_ostream.cpp +++ b/src/asm_ostream.cpp @@ -359,6 +359,8 @@ struct CommandPrinterVisitor { void operator()(LoadMapFd const& b) { os_ << b.dst << " = map_fd " << b.mapfd; } + void operator()(LoadMapAddress const& b) { os_ << b.dst << " = map_val(" << b.mapfd << ") + " << b.offset; } + // llvm-objdump uses "w" for 32-bit operations and "r" for 64-bit operations. // We use the same convention here for consistency. static std::string reg_name(Reg const& a, const bool is64) { return ((is64) ? "r" : "w") + std::to_string(a.v); } @@ -551,6 +553,9 @@ int size(const Instruction& inst) { if (std::holds_alternative(inst)) { return 2; } + if (std::holds_alternative(inst)) { + return 2; + } return 1; } diff --git a/src/asm_parse.cpp b/src/asm_parse.cpp index 31ec6b9a..3d3e0376 100644 --- a/src/asm_parse.cpp +++ b/src/asm_parse.cpp @@ -54,6 +54,12 @@ using crab::number_t; #define DOT "[.]" #define TYPE R"_(\s*(shared|number|packet|stack|ctx|map_fd|map_fd_programs)\s*)_" +// Match map_val(fd) + offset +#define MAP_VAL R"_(\s*map_val\((\d+)\)\s*\+\s*(\d+)\s*)_" + +// Match map_fd fd +#define MAP_FD R"_(\s*map_fd\s+(\d+)\s*)_" + static const std::map str_to_binop = { {"", Bin::Op::MOV}, {"+", Bin::Op::ADD}, {"-", Bin::Op::SUB}, {"*", Bin::Op::MUL}, {"/", Bin::Op::UDIV}, {"%", Bin::Op::UMOD}, {"|", Bin::Op::OR}, {"&", Bin::Op::AND}, @@ -164,6 +170,13 @@ Instruction parse_instruction(const std::string& line, const std::map(m[2]), .offset = boost::lexical_cast(m[3])}; + } + if (regex_match(text, m, regex(WREG ASSIGN MAP_FD))) { + return LoadMapFd{.dst = reg(m[1]), .mapfd = boost::lexical_cast(m[2])}; + } if (regex_match(text, m, regex(WREG OPASSIGN IMM LONGLONG))) { const std::string r = m[1]; const bool lddw = !m[4].str().empty(); diff --git a/src/asm_syntax.hpp b/src/asm_syntax.hpp index 3c257919..645cec66 100644 --- a/src/asm_syntax.hpp +++ b/src/asm_syntax.hpp @@ -93,6 +93,15 @@ struct LoadMapFd { constexpr bool operator==(const LoadMapFd&) const = default; }; +// Load the address of a map value into a register. +struct LoadMapAddress { + Reg dst; + int32_t mapfd{}; + int32_t offset{}; + + constexpr bool operator==(const LoadMapAddress&) const = default; +}; + struct Condition { enum class Op { EQ, @@ -249,7 +258,7 @@ struct IncrementLoopCounter { }; using Instruction = std::variant; + Assume, IncrementLoopCounter, LoadMapAddress>; using LabeledInstruction = std::tuple>; using InstructionSeq = std::vector; diff --git a/src/asm_unmarshal.cpp b/src/asm_unmarshal.cpp index e35c851c..e7e999dc 100644 --- a/src/asm_unmarshal.cpp +++ b/src/asm_unmarshal.cpp @@ -429,7 +429,7 @@ struct Unmarshaller { if (next.opcode != 0 || next.dst != 0 || next.src != 0 || next.offset != 0) { throw InvalidInstruction(pc, "invalid lddw"); } - if (inst.src > 1) { + if (inst.src > INST_LD_MODE_MAP_VALUE) { throw InvalidInstruction(pc, make_opcode_message("bad instruction", inst.opcode)); } if (inst.offset != 0) { @@ -439,7 +439,7 @@ struct Unmarshaller { throw InvalidInstruction(pc, "bad register"); } - if (inst.src == 1) { + if (inst.src == INST_LD_MODE_MAP_FD) { // magic number, meaning we're a per-process file descriptor defining the map. // (for details, look for BPF_PSEUDO_MAP_FD in the kernel) if (next.imm != 0) { @@ -447,6 +447,9 @@ struct Unmarshaller { } return LoadMapFd{.dst = Reg{inst.dst}, .mapfd = inst.imm}; } + if (inst.src == INST_LD_MODE_MAP_VALUE) { + return LoadMapAddress{.dst = Reg{inst.dst}, .mapfd = inst.imm, .offset = next_imm}; + } return Bin{ .op = Bin::Op::MOV, diff --git a/src/assertions.cpp b/src/assertions.cpp index 3b82b292..0d8f7f7c 100644 --- a/src/assertions.cpp +++ b/src/assertions.cpp @@ -45,6 +45,7 @@ class AssertExtractor { vector operator()(const IncrementLoopCounter& ipc) const { return {{BoundedLoopCount{ipc.name}}}; } vector operator()(const LoadMapFd&) const { return {}; } + vector operator()(const LoadMapAddress&) const { return {}; } /// Packet access implicitly uses R6, so verify that R6 still has a pointer to the context. vector operator()(const Packet&) const { return zero_offset_ctx({6}); } diff --git a/src/crab/ebpf_transformer.cpp b/src/crab/ebpf_transformer.cpp index c3eec5d4..424fabf7 100644 --- a/src/crab/ebpf_transformer.cpp +++ b/src/crab/ebpf_transformer.cpp @@ -44,6 +44,7 @@ class ebpf_transformer final { void operator()(const IncrementLoopCounter&); void operator()(const Jmp&) const; void operator()(const LoadMapFd&); + void operator()(const LoadMapAddress&); void operator()(const Mem&); void operator()(const Packet&); void operator()(const Un&); @@ -113,6 +114,7 @@ class ebpf_transformer final { void havoc_subprogram_stack(const std::string& prefix); void forget_packet_pointers(); void do_load_mapfd(const Reg& dst_reg, int mapfd, bool maybe_null); + void do_load_map_address(const Reg& dst_reg, const int mapfd, int32_t offset); void assign_valid_ptr(const Reg& dst_reg, bool maybe_null); @@ -1839,6 +1841,20 @@ void ebpf_transformer::do_load_mapfd(const Reg& dst_reg, const int mapfd, const void ebpf_transformer::operator()(const LoadMapFd& ins) { do_load_mapfd(ins.dst, ins.mapfd, false); } +void ebpf_transformer::do_load_map_address(const Reg& dst_reg, const int mapfd, int32_t offset) { + const EbpfMapDescriptor& desc = thread_local_program_info->platform->get_map_descriptor(mapfd); + const EbpfMapType& type = thread_local_program_info->platform->get_map_type(desc.type); + + // Set the shared region size and offset for the map. + type_inv.assign_type(m_inv, dst_reg, T_SHARED); + const reg_pack_t& dst = reg_pack(dst_reg); + m_inv.assign(dst.shared_offset, offset); + m_inv.assign(dst.shared_region_size, desc.value_size); + assign_valid_ptr(dst_reg, false); +} + +void ebpf_transformer::operator()(const LoadMapAddress& ins) { do_load_map_address(ins.dst, ins.mapfd, ins.offset); } + void ebpf_transformer::assign_valid_ptr(const Reg& dst_reg, const bool maybe_null) { using namespace crab::dsl_syntax; const reg_pack_t& reg = reg_pack(dst_reg); diff --git a/src/ebpf_vm_isa.hpp b/src/ebpf_vm_isa.hpp index 09f204e5..1248564f 100644 --- a/src/ebpf_vm_isa.hpp +++ b/src/ebpf_vm_isa.hpp @@ -87,6 +87,10 @@ enum { INST_ALU_OP_ARSH = 0xc0, INST_ALU_OP_END = 0xd0, INST_ALU_OP_MASK = 0xf0, + + INST_LD_MODE_IMM = 0x0, // 64-bit immediate value + INST_LD_MODE_MAP_FD = 0x1, // Load map fd + INST_LD_MODE_MAP_VALUE = 0x2, // Load map value }; enum { diff --git a/src/test/test_marshal.cpp b/src/test/test_marshal.cpp index 7dcae9bd..8cba191d 100644 --- a/src/test/test_marshal.cpp +++ b/src/test/test_marshal.cpp @@ -41,8 +41,8 @@ static const ebpf_instruction_template_t instruction_template[] = { {{0x17, DST, 0, 0, IMM}, bpf_conformance_groups_t::base64}, {{0x18, DST, 0, 0, IMM}, bpf_conformance_groups_t::base64}, {{0x18, DST, 1, 0, IMM}, bpf_conformance_groups_t::base64}, - // TODO(issue #533): add support for LDDW with src_reg > 1. - // {{0x18, DST, 2, 0, IMM}, bpf_conformance_groups_t::base64}, + {{0x18, DST, 2, 0, IMM}, bpf_conformance_groups_t::base64}, + // TODO(issue #533): add support for LDDW with src_reg > 2. // {{0x18, DST, 3, 0, IMM}, bpf_conformance_groups_t::base64}, // {{0x18, DST, 4, 0, IMM}, bpf_conformance_groups_t::base64}, // {{0x18, DST, 5, 0, IMM}, bpf_conformance_groups_t::base64}, @@ -392,6 +392,9 @@ TEST_CASE("disasm_marshal", "[disasm][marshal]") { } SECTION("LoadMapFd") { compare_marshal_unmarshal(LoadMapFd{.dst = Reg{1}, .mapfd = 1}, true); } + SECTION("LoadMapAddress") { + compare_marshal_unmarshal(LoadMapAddress{.dst = Reg{1}, .mapfd = 1, .offset = 4}, true); + } SECTION("Jmp") { auto ops = {Condition::Op::EQ, Condition::Op::GT, Condition::Op::GE, Condition::Op::SET, diff --git a/src/test/test_verify.cpp b/src/test/test_verify.cpp index 50d428b9..d7bfd058 100644 --- a/src/test/test_verify.cpp +++ b/src/test/test_verify.cpp @@ -424,9 +424,6 @@ TEST_SECTION("falco", "probe.o", "raw_tracepoint/signal_deliver") TEST_SECTION_REJECT_IF_STRICT("build", "mapoverflow.o", ".text") TEST_SECTION_REJECT_IF_STRICT("build", "mapunderflow.o", ".text") -// Test uses global variables, which are not supported yet by the verifier. -TEST_SECTION_FAIL("build", "global_variable.o", ".text") - /* * These programs contain "call -1" instruction and cannot be verified: TEST_SECTION("raw_tracepoint/filler/sys_access_e") @@ -528,6 +525,7 @@ TEST_SECTION("build", "store_map_value_in_map.o", ".text") TEST_SECTION("build", "twomaps.o", ".text"); TEST_SECTION("build", "twostackvars.o", ".text"); TEST_SECTION("build", "twotypes.o", ".text"); +TEST_SECTION("build", "global_variable.o", ".text") TEST_PROGRAM("build", "prog_array.o", ".text", "func", 5); TEST_PROGRAM("build", "prog_array.o", ".text", "func0", 5); TEST_PROGRAM("build", "prog_array.o", ".text", "func1", 5); diff --git a/src/test/test_yaml.cpp b/src/test/test_yaml.cpp index a29ab173..3b786b72 100644 --- a/src/test/test_yaml.cpp +++ b/src/test/test_yaml.cpp @@ -31,6 +31,7 @@ YAML_CASE("test-data/sdivmod.yaml") YAML_CASE("test-data/full64.yaml") YAML_CASE("test-data/jump.yaml") YAML_CASE("test-data/loop.yaml") +YAML_CASE("test-data/map.yaml") YAML_CASE("test-data/movsx.yaml") YAML_CASE("test-data/muldiv.yaml") YAML_CASE("test-data/packet.yaml") diff --git a/test-data/map.yaml b/test-data/map.yaml new file mode 100644 index 00000000..750cff09 --- /dev/null +++ b/test-data/map.yaml @@ -0,0 +1,26 @@ +# Copyright (c) Prevail Verifier contributors. +# SPDX-License-Identifier: MIT +--- +test-case: load_map_address +pre: [] +code: + : | + r2 = map_val(1) + 8 +post: + - r2.shared_offset=8 + - r2.shared_region_size=4 + - r2.svalue=[1, 2147418112] + - r2.type=shared + - r2.uvalue=r2.svalue +--- +test-case: load_map_fd +pre: [] +code: + : | + r1 = map_fd 1 + +post: + - r1.map_fd=1 + - r1.type=map_fd + - r1.svalue=[1, 2147418112] + - r1.uvalue=r1.svalue From e3dd2736e676e09db07258001c0333ef8006197d Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Sun, 19 Jan 2025 14:14:38 -0800 Subject: [PATCH 3/5] PR feedback Signed-off-by: Alan Jowett --- src/asm_syntax.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/asm_syntax.hpp b/src/asm_syntax.hpp index 645cec66..53b2ba49 100644 --- a/src/asm_syntax.hpp +++ b/src/asm_syntax.hpp @@ -95,9 +95,9 @@ struct LoadMapFd { // Load the address of a map value into a register. struct LoadMapAddress { - Reg dst; - int32_t mapfd{}; - int32_t offset{}; + Reg dst; // Destination register to store the address of the map value. + int32_t mapfd{}; // File descriptor of the map to load the address from. + int32_t offset{}; // Offset within the map, must be within bounds. constexpr bool operator==(const LoadMapAddress&) const = default; }; From 320dee72e773a48bb74d66419552fdfb48b7cfae Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Thu, 23 Jan 2025 09:06:50 -0800 Subject: [PATCH 4/5] PR feedback Signed-off-by: Alan Jowett --- src/asm_files.cpp | 26 ++++++++------------------ src/asm_marshal.cpp | 2 +- src/asm_ostream.cpp | 15 --------------- src/asm_syntax.hpp | 2 ++ src/asm_unmarshal.cpp | 23 ++++++++++++----------- src/crab/ebpf_transformer.cpp | 6 +++++- 6 files changed, 28 insertions(+), 46 deletions(-) diff --git a/src/asm_files.cpp b/src/asm_files.cpp index c17920df..d9b8b680 100644 --- a/src/asm_files.cpp +++ b/src/asm_files.cpp @@ -372,14 +372,8 @@ vector read_elf(std::istream& input_stream, const std::string& path std::variant> map_record_size_or_map_offsets = size_t{0}; ELFIO::const_symbol_section_accessor symbols{reader, symbol_section}; - bool contains_old_style_map_sections = false; - for (const auto& section : reader.sections) { - if (is_map_section(section->get_name())) { - contains_old_style_map_sections = true; - break; - } - } - if (contains_old_style_map_sections) { + + if (std::ranges::any_of(reader.sections, [](const auto& section) { return is_map_section(section->get_name()); })) { map_record_size_or_map_offsets = parse_map_sections(options, platform, reader, info.map_descriptors, map_section_indices, symbols); } else if (btf_data.has_value()) { @@ -408,16 +402,12 @@ vector read_elf(std::istream& input_stream, const std::string& path map_section_indices.insert(reader.sections[".maps"]->get_index()); } - if (reader.sections[".data"]) { - global_variable_section_indices.insert(reader.sections[".data"]->get_index()); - } - - if (reader.sections[".bss"]) { - global_variable_section_indices.insert(reader.sections[".bss"]->get_index()); - } - - if (reader.sections[".rodata"]) { - global_variable_section_indices.insert(reader.sections[".rodata"]->get_index()); + for (auto section_name : {".rodata", ".data", ".bss"}) { + if (const auto section = reader.sections[section_name]) { + if (section->get_size() != 0) { + global_variable_section_indices.insert(section->get_index()); + } + } } } diff --git a/src/asm_marshal.cpp b/src/asm_marshal.cpp index 8ed5d3ef..2dccd044 100644 --- a/src/asm_marshal.cpp +++ b/src/asm_marshal.cpp @@ -299,7 +299,7 @@ vector marshal(const Instruction& ins, const pc_t pc) { return std::visit(MarshalVisitor{crab::label_to_offset16(pc), crab::label_to_offset32(pc)}, ins); } -static int size(const Instruction& inst) { +int asm_syntax::size(const Instruction& inst) { if (const auto pins = std::get_if(&inst)) { if (pins->lddw) { return 2; diff --git a/src/asm_ostream.cpp b/src/asm_ostream.cpp index 941932f4..9ecab6f5 100644 --- a/src/asm_ostream.cpp +++ b/src/asm_ostream.cpp @@ -544,21 +544,6 @@ string to_string(Assertion const& constraint) { return str.str(); } -int size(const Instruction& inst) { - if (const auto bin = std::get_if(&inst)) { - if (bin->lddw) { - return 2; - } - } - if (std::holds_alternative(inst)) { - return 2; - } - if (std::holds_alternative(inst)) { - return 2; - } - return 1; -} - auto get_labels(const InstructionSeq& insts) { pc_t pc = 0; std::map pc_of_label; diff --git a/src/asm_syntax.hpp b/src/asm_syntax.hpp index 53b2ba49..80595a2f 100644 --- a/src/asm_syntax.hpp +++ b/src/asm_syntax.hpp @@ -383,6 +383,8 @@ std::string to_string(const Assertion& constraint); void print(const InstructionSeq& insts, std::ostream& out, const std::optional& label_to_print, bool print_line_info = false); +int size(const Instruction& inst); + } // namespace asm_syntax using namespace asm_syntax; diff --git a/src/asm_unmarshal.cpp b/src/asm_unmarshal.cpp index e7e999dc..0587049f 100644 --- a/src/asm_unmarshal.cpp +++ b/src/asm_unmarshal.cpp @@ -439,7 +439,16 @@ struct Unmarshaller { throw InvalidInstruction(pc, "bad register"); } - if (inst.src == INST_LD_MODE_MAP_FD) { + switch (inst.src) { + case INST_LD_MODE_IMM: + return Bin{ + .op = Bin::Op::MOV, + .dst = Reg{inst.dst}, + .v = Imm{merge(inst.imm, next_imm)}, + .is64 = true, + .lddw = true, + }; + case INST_LD_MODE_MAP_FD: { // magic number, meaning we're a per-process file descriptor defining the map. // (for details, look for BPF_PSEUDO_MAP_FD in the kernel) if (next.imm != 0) { @@ -447,17 +456,9 @@ struct Unmarshaller { } return LoadMapFd{.dst = Reg{inst.dst}, .mapfd = inst.imm}; } - if (inst.src == INST_LD_MODE_MAP_VALUE) { - return LoadMapAddress{.dst = Reg{inst.dst}, .mapfd = inst.imm, .offset = next_imm}; + case INST_LD_MODE_MAP_VALUE: return LoadMapAddress{.dst = Reg{inst.dst}, .mapfd = inst.imm, .offset = next_imm}; + default: throw InvalidInstruction(pc, make_opcode_message("bad instruction", inst.opcode)); } - - return Bin{ - .op = Bin::Op::MOV, - .dst = Reg{inst.dst}, - .v = Imm{merge(inst.imm, next_imm)}, - .is64 = true, - .lddw = true, - }; } static ArgSingle::Kind toArgSingleKind(const ebpf_argument_type_t t) { diff --git a/src/crab/ebpf_transformer.cpp b/src/crab/ebpf_transformer.cpp index 424fabf7..76eea5e5 100644 --- a/src/crab/ebpf_transformer.cpp +++ b/src/crab/ebpf_transformer.cpp @@ -1841,10 +1841,14 @@ void ebpf_transformer::do_load_mapfd(const Reg& dst_reg, const int mapfd, const void ebpf_transformer::operator()(const LoadMapFd& ins) { do_load_mapfd(ins.dst, ins.mapfd, false); } -void ebpf_transformer::do_load_map_address(const Reg& dst_reg, const int mapfd, int32_t offset) { +void ebpf_transformer::do_load_map_address(const Reg& dst_reg, const int mapfd, const int32_t offset) { const EbpfMapDescriptor& desc = thread_local_program_info->platform->get_map_descriptor(mapfd); const EbpfMapType& type = thread_local_program_info->platform->get_map_type(desc.type); + if (type.value_type == EbpfMapValueType::PROGRAM) { + throw std::runtime_error("Program map types are not supported"); + } + // Set the shared region size and offset for the map. type_inv.assign_type(m_inv, dst_reg, T_SHARED); const reg_pack_t& dst = reg_pack(dst_reg); From 10b97379eaa3d09b879838b3b3bb2dc620c6205b Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Thu, 23 Jan 2025 09:18:25 -0800 Subject: [PATCH 5/5] PR feedback Signed-off-by: Alan Jowett --- src/crab/ebpf_transformer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crab/ebpf_transformer.cpp b/src/crab/ebpf_transformer.cpp index 76eea5e5..4ab5b016 100644 --- a/src/crab/ebpf_transformer.cpp +++ b/src/crab/ebpf_transformer.cpp @@ -1846,7 +1846,7 @@ void ebpf_transformer::do_load_map_address(const Reg& dst_reg, const int mapfd, const EbpfMapType& type = thread_local_program_info->platform->get_map_type(desc.type); if (type.value_type == EbpfMapValueType::PROGRAM) { - throw std::runtime_error("Program map types are not supported"); + throw std::invalid_argument("Cannot load address of program map type - only data maps are supported"); } // Set the shared region size and offset for the map.