diff --git a/external/libbtf b/external/libbtf index 3c0cbd6ca..760a80490 160000 --- a/external/libbtf +++ b/external/libbtf @@ -1 +1 @@ -Subproject commit 3c0cbd6ca0920e4d5df6fb88a892f0b5ac5425d7 +Subproject commit 760a80490a4c67ca03046797868691cb81454d81 diff --git a/src/asm_cfg.cpp b/src/asm_cfg.cpp index acaf689f9..acbbc169f 100644 --- a/src/asm_cfg.cpp +++ b/src/asm_cfg.cpp @@ -399,6 +399,8 @@ static std::string instype(Instruction ins) { return "arith"; } else if (std::holds_alternative(ins)) { return "assign"; + } else if (std::holds_alternative(ins)) { + return "assign"; } else if (std::holds_alternative(ins)) { return "assume"; } else { diff --git a/src/asm_files.cpp b/src/asm_files.cpp index 551a88817..c17920df7 100644 --- a/src/asm_files.cpp +++ b/src/asm_files.cpp @@ -162,16 +162,20 @@ get_program_name_and_size(const ELFIO::section& sec, const ELFIO::Elf_Xword star return {program_name, size}; } -void relocate_map(ebpf_inst& inst, const std::string& symbol_name, +void verify_load_instruction(const ebpf_inst& instruction, const std::string& symbol_name, ELFIO::Elf64_Addr offset) { + if ((instruction.opcode & INST_CLS_MASK) != INST_CLS_LD) { + throw UnmarshalError("Illegal operation on symbol " + symbol_name + " at location " + + std::to_string(offset / sizeof(ebpf_inst))); + } +} + +void relocate_map(ebpf_inst& reloc_inst, const std::string& symbol_name, const std::variant>& map_record_size_or_map_offsets, const program_info& info, const ELFIO::Elf64_Addr offset, const ELFIO::Elf_Word index, const ELFIO::const_symbol_section_accessor& symbols) { // Only permit loading the address of the map. - if ((inst.opcode & INST_CLS_MASK) != INST_CLS_LD) { - throw UnmarshalError("Illegal operation on symbol " + symbol_name + " at location " + - std::to_string(offset / sizeof(ebpf_inst))); - } - inst.src = 1; // magic number for LoadFd + verify_load_instruction(reloc_inst, symbol_name, offset); + reloc_inst.src = INST_LD_MODE_MAP_FD; // Relocation value is an offset into the "maps" or ".maps" section. size_t reloc_value = std::numeric_limits::max(); @@ -187,13 +191,42 @@ void relocate_map(ebpf_inst& inst, const std::string& symbol_name, const auto it = map_descriptors_offsets.find(symbol_name); if (it != map_descriptors_offsets.end()) { reloc_value = it->second; + } else { + throw UnmarshalError("Map descriptor not found for symbol " + symbol_name); } } if (reloc_value >= info.map_descriptors.size()) { throw UnmarshalError("Bad reloc value (" + std::to_string(reloc_value) + "). " + "Make sure to compile with -O2."); } - inst.imm = info.map_descriptors.at(reloc_value).original_fd; + reloc_inst.imm = info.map_descriptors.at(reloc_value).original_fd; +} + +void relocate_global_variable(ebpf_inst& reloc_inst, ebpf_inst& next_reloc_inst, const std::string& symbol_name, + const program_info& info, + const std::variant>& map_record_size_or_map_offsets, + const ELFIO::Elf64_Addr offset) { + // Only permit loading the address of the global variable. + verify_load_instruction(reloc_inst, symbol_name, offset); + + // Copy the immediate value to the next instruction. + next_reloc_inst.imm = reloc_inst.imm; + reloc_inst.src = INST_LD_MODE_MAP_VALUE; + + size_t reloc_value = std::numeric_limits::max(); + auto& map_descriptors_offsets = std::get<1>(map_record_size_or_map_offsets); + const auto it = map_descriptors_offsets.find(symbol_name); + if (it != map_descriptors_offsets.end()) { + reloc_value = it->second; + } else { + throw UnmarshalError("Map descriptor not found for symbol " + symbol_name); + } + + if (reloc_value >= info.map_descriptors.size()) { + throw UnmarshalError("Bad reloc value (" + std::to_string(reloc_value) + "). " + + "Make sure to compile with -O2."); + } + reloc_inst.imm = info.map_descriptors.at(reloc_value).original_fd; } // Structure used to keep track of subprogram relocation data until any subprograms @@ -319,6 +352,7 @@ vector read_elf(std::istream& input_stream, const std::string& path program_info info{platform}; std::set map_section_indices; + std::set global_variable_section_indices; auto btf = reader.sections[".BTF"]; std::optional btf_data; @@ -338,13 +372,17 @@ vector read_elf(std::istream& input_stream, const std::string& path std::variant> map_record_size_or_map_offsets = size_t{0}; ELFIO::const_symbol_section_accessor symbols{reader, symbol_section}; - if (!reader.sections[".maps"]) { + bool contains_old_style_map_sections = false; + for (const auto& section : reader.sections) { + if (is_map_section(section->get_name())) { + contains_old_style_map_sections = true; + break; + } + } + if (contains_old_style_map_sections) { map_record_size_or_map_offsets = parse_map_sections(options, platform, reader, info.map_descriptors, map_section_indices, symbols); - } else { - if (!btf_data.has_value()) { - throw UnmarshalError("No BTF section found in ELF file " + path); - } + } else if (btf_data.has_value()) { map_record_size_or_map_offsets = parse_map_section(*btf_data, info.map_descriptors); // Prevail requires: // Map fds are sequential starting from 1. @@ -366,7 +404,21 @@ vector read_elf(std::istream& input_stream, const std::string& path map_descriptor.inner_map_fd = type_id_to_fd_map[map_descriptor.inner_map_fd]; } } - map_section_indices.insert(reader.sections[".maps"]->get_index()); + if (reader.sections[".maps"]) { + map_section_indices.insert(reader.sections[".maps"]->get_index()); + } + + if (reader.sections[".data"]) { + global_variable_section_indices.insert(reader.sections[".data"]->get_index()); + } + + if (reader.sections[".bss"]) { + global_variable_section_indices.insert(reader.sections[".bss"]->get_index()); + } + + if (reader.sections[".rodata"]) { + global_variable_section_indices.insert(reader.sections[".rodata"]->get_index()); + } } vector res; @@ -426,12 +478,13 @@ vector read_elf(std::istream& input_stream, const std::string& path if (offset / sizeof(ebpf_inst) >= prog.prog.size()) { throw UnmarshalError("Invalid relocation data"); } - ebpf_inst& inst = prog.prog[offset / sizeof(ebpf_inst)]; + + ebpf_inst& reloc_inst = prog.prog[offset / sizeof(ebpf_inst)]; auto [symbol_name, symbol_section_index] = get_symbol_name_and_section_index(symbols, index); // Queue up relocation for function symbols. - if (inst.opcode == INST_OP_CALL && inst.src == INST_CALL_LOCAL) { + if (reloc_inst.opcode == INST_OP_CALL && reloc_inst.src == INST_CALL_LOCAL) { function_relocation fr{.prog_index = res.size(), .source_offset = offset / sizeof(ebpf_inst), .relocation_entry_index = index, @@ -440,9 +493,26 @@ vector read_elf(std::istream& input_stream, const std::string& path continue; } + // Verify that this is a map or global variable relocation. + verify_load_instruction(reloc_inst, symbol_name, offset); + + // Load instructions are two instructions long, so we need to check the next instruction. + if (prog.prog.size() <= offset / sizeof(ebpf_inst) + 1) { + throw UnmarshalError("Invalid relocation data"); + } + ebpf_inst& next_reloc_inst = prog.prog[offset / sizeof(ebpf_inst) + 1]; + // Perform relocation for symbols located in the maps section. if (map_section_indices.contains(symbol_section_index)) { - relocate_map(inst, symbol_name, map_record_size_or_map_offsets, info, offset, index, symbols); + relocate_map(reloc_inst, symbol_name, map_record_size_or_map_offsets, info, offset, index, + symbols); + continue; + } + + if (global_variable_section_indices.contains(symbol_section_index)) { + relocate_global_variable(reloc_inst, next_reloc_inst, + reader.sections[symbol_section_index]->get_name(), info, + map_record_size_or_map_offsets, offset); continue; } diff --git a/src/asm_marshal.cpp b/src/asm_marshal.cpp index 953435b70..8ed5d3ef8 100644 --- a/src/asm_marshal.cpp +++ b/src/asm_marshal.cpp @@ -87,10 +87,10 @@ static uint8_t imm_endian(const Un::Op op) { struct MarshalVisitor { private: - static vector makeLddw(const Reg dst, const bool isFd, const int32_t imm, const int32_t next_imm) { + static vector makeLddw(const Reg dst, const uint8_t type, const int32_t imm, const int32_t next_imm) { return {ebpf_inst{.opcode = gsl::narrow(INST_CLS_LD | width_to_opcode(8)), .dst = dst.v, - .src = gsl::narrow(isFd ? 1 : 0), + .src = type, .offset = 0, .imm = imm}, ebpf_inst{.opcode = 0, .dst = 0, .src = 0, .offset = 0, .imm = next_imm}}; @@ -105,14 +105,18 @@ struct MarshalVisitor { return {}; } - vector operator()(LoadMapFd const& b) const { return makeLddw(b.dst, true, b.mapfd, 0); } + vector operator()(LoadMapFd const& b) const { return makeLddw(b.dst, INST_LD_MODE_MAP_FD, b.mapfd, 0); } + + vector operator()(LoadMapAddress const& b) const { + return makeLddw(b.dst, INST_LD_MODE_MAP_VALUE, b.mapfd, b.offset); + } vector operator()(Bin const& b) const { if (b.lddw) { const auto pimm = std::get_if(&b.v); assert(pimm != nullptr); auto [imm, next_imm] = split(pimm->v); - return makeLddw(b.dst, false, imm, next_imm); + return makeLddw(b.dst, INST_LD_MODE_IMM, imm, next_imm); } ebpf_inst res{.opcode = gsl::narrow((b.is64 ? INST_CLS_ALU64 : INST_CLS_ALU) | (op(b.op) << 4)), @@ -304,6 +308,9 @@ static int size(const Instruction& inst) { if (std::holds_alternative(inst)) { return 2; } + if (std::holds_alternative(inst)) { + return 2; + } return 1; } diff --git a/src/asm_ostream.cpp b/src/asm_ostream.cpp index 1fb9fab45..941932f4e 100644 --- a/src/asm_ostream.cpp +++ b/src/asm_ostream.cpp @@ -359,6 +359,8 @@ struct CommandPrinterVisitor { void operator()(LoadMapFd const& b) { os_ << b.dst << " = map_fd " << b.mapfd; } + void operator()(LoadMapAddress const& b) { os_ << b.dst << " = map_val(" << b.mapfd << ") + " << b.offset; } + // llvm-objdump uses "w" for 32-bit operations and "r" for 64-bit operations. // We use the same convention here for consistency. static std::string reg_name(Reg const& a, const bool is64) { return ((is64) ? "r" : "w") + std::to_string(a.v); } @@ -551,6 +553,9 @@ int size(const Instruction& inst) { if (std::holds_alternative(inst)) { return 2; } + if (std::holds_alternative(inst)) { + return 2; + } return 1; } diff --git a/src/asm_parse.cpp b/src/asm_parse.cpp index 31ec6b9a9..3d3e03765 100644 --- a/src/asm_parse.cpp +++ b/src/asm_parse.cpp @@ -54,6 +54,12 @@ using crab::number_t; #define DOT "[.]" #define TYPE R"_(\s*(shared|number|packet|stack|ctx|map_fd|map_fd_programs)\s*)_" +// Match map_val(fd) + offset +#define MAP_VAL R"_(\s*map_val\((\d+)\)\s*\+\s*(\d+)\s*)_" + +// Match map_fd fd +#define MAP_FD R"_(\s*map_fd\s+(\d+)\s*)_" + static const std::map str_to_binop = { {"", Bin::Op::MOV}, {"+", Bin::Op::ADD}, {"-", Bin::Op::SUB}, {"*", Bin::Op::MUL}, {"/", Bin::Op::UDIV}, {"%", Bin::Op::UMOD}, {"|", Bin::Op::OR}, {"&", Bin::Op::AND}, @@ -164,6 +170,13 @@ Instruction parse_instruction(const std::string& line, const std::map(m[2]), .offset = boost::lexical_cast(m[3])}; + } + if (regex_match(text, m, regex(WREG ASSIGN MAP_FD))) { + return LoadMapFd{.dst = reg(m[1]), .mapfd = boost::lexical_cast(m[2])}; + } if (regex_match(text, m, regex(WREG OPASSIGN IMM LONGLONG))) { const std::string r = m[1]; const bool lddw = !m[4].str().empty(); diff --git a/src/asm_syntax.hpp b/src/asm_syntax.hpp index 3c2579199..645cec661 100644 --- a/src/asm_syntax.hpp +++ b/src/asm_syntax.hpp @@ -93,6 +93,15 @@ struct LoadMapFd { constexpr bool operator==(const LoadMapFd&) const = default; }; +// Load the address of a map value into a register. +struct LoadMapAddress { + Reg dst; + int32_t mapfd{}; + int32_t offset{}; + + constexpr bool operator==(const LoadMapAddress&) const = default; +}; + struct Condition { enum class Op { EQ, @@ -249,7 +258,7 @@ struct IncrementLoopCounter { }; using Instruction = std::variant; + Assume, IncrementLoopCounter, LoadMapAddress>; using LabeledInstruction = std::tuple>; using InstructionSeq = std::vector; diff --git a/src/asm_unmarshal.cpp b/src/asm_unmarshal.cpp index e35c851cd..e7e999dcf 100644 --- a/src/asm_unmarshal.cpp +++ b/src/asm_unmarshal.cpp @@ -429,7 +429,7 @@ struct Unmarshaller { if (next.opcode != 0 || next.dst != 0 || next.src != 0 || next.offset != 0) { throw InvalidInstruction(pc, "invalid lddw"); } - if (inst.src > 1) { + if (inst.src > INST_LD_MODE_MAP_VALUE) { throw InvalidInstruction(pc, make_opcode_message("bad instruction", inst.opcode)); } if (inst.offset != 0) { @@ -439,7 +439,7 @@ struct Unmarshaller { throw InvalidInstruction(pc, "bad register"); } - if (inst.src == 1) { + if (inst.src == INST_LD_MODE_MAP_FD) { // magic number, meaning we're a per-process file descriptor defining the map. // (for details, look for BPF_PSEUDO_MAP_FD in the kernel) if (next.imm != 0) { @@ -447,6 +447,9 @@ struct Unmarshaller { } return LoadMapFd{.dst = Reg{inst.dst}, .mapfd = inst.imm}; } + if (inst.src == INST_LD_MODE_MAP_VALUE) { + return LoadMapAddress{.dst = Reg{inst.dst}, .mapfd = inst.imm, .offset = next_imm}; + } return Bin{ .op = Bin::Op::MOV, diff --git a/src/assertions.cpp b/src/assertions.cpp index 3b82b2929..0d8f7f7c9 100644 --- a/src/assertions.cpp +++ b/src/assertions.cpp @@ -45,6 +45,7 @@ class AssertExtractor { vector operator()(const IncrementLoopCounter& ipc) const { return {{BoundedLoopCount{ipc.name}}}; } vector operator()(const LoadMapFd&) const { return {}; } + vector operator()(const LoadMapAddress&) const { return {}; } /// Packet access implicitly uses R6, so verify that R6 still has a pointer to the context. vector operator()(const Packet&) const { return zero_offset_ctx({6}); } diff --git a/src/crab/ebpf_transformer.cpp b/src/crab/ebpf_transformer.cpp index c3eec5d45..424fabf71 100644 --- a/src/crab/ebpf_transformer.cpp +++ b/src/crab/ebpf_transformer.cpp @@ -44,6 +44,7 @@ class ebpf_transformer final { void operator()(const IncrementLoopCounter&); void operator()(const Jmp&) const; void operator()(const LoadMapFd&); + void operator()(const LoadMapAddress&); void operator()(const Mem&); void operator()(const Packet&); void operator()(const Un&); @@ -113,6 +114,7 @@ class ebpf_transformer final { void havoc_subprogram_stack(const std::string& prefix); void forget_packet_pointers(); void do_load_mapfd(const Reg& dst_reg, int mapfd, bool maybe_null); + void do_load_map_address(const Reg& dst_reg, const int mapfd, int32_t offset); void assign_valid_ptr(const Reg& dst_reg, bool maybe_null); @@ -1839,6 +1841,20 @@ void ebpf_transformer::do_load_mapfd(const Reg& dst_reg, const int mapfd, const void ebpf_transformer::operator()(const LoadMapFd& ins) { do_load_mapfd(ins.dst, ins.mapfd, false); } +void ebpf_transformer::do_load_map_address(const Reg& dst_reg, const int mapfd, int32_t offset) { + const EbpfMapDescriptor& desc = thread_local_program_info->platform->get_map_descriptor(mapfd); + const EbpfMapType& type = thread_local_program_info->platform->get_map_type(desc.type); + + // Set the shared region size and offset for the map. + type_inv.assign_type(m_inv, dst_reg, T_SHARED); + const reg_pack_t& dst = reg_pack(dst_reg); + m_inv.assign(dst.shared_offset, offset); + m_inv.assign(dst.shared_region_size, desc.value_size); + assign_valid_ptr(dst_reg, false); +} + +void ebpf_transformer::operator()(const LoadMapAddress& ins) { do_load_map_address(ins.dst, ins.mapfd, ins.offset); } + void ebpf_transformer::assign_valid_ptr(const Reg& dst_reg, const bool maybe_null) { using namespace crab::dsl_syntax; const reg_pack_t& reg = reg_pack(dst_reg); diff --git a/src/ebpf_vm_isa.hpp b/src/ebpf_vm_isa.hpp index 09f204e5a..1248564f2 100644 --- a/src/ebpf_vm_isa.hpp +++ b/src/ebpf_vm_isa.hpp @@ -87,6 +87,10 @@ enum { INST_ALU_OP_ARSH = 0xc0, INST_ALU_OP_END = 0xd0, INST_ALU_OP_MASK = 0xf0, + + INST_LD_MODE_IMM = 0x0, // 64-bit immediate value + INST_LD_MODE_MAP_FD = 0x1, // Load map fd + INST_LD_MODE_MAP_VALUE = 0x2, // Load map value }; enum { diff --git a/src/test/test_marshal.cpp b/src/test/test_marshal.cpp index 7dcae9bda..8cba191da 100644 --- a/src/test/test_marshal.cpp +++ b/src/test/test_marshal.cpp @@ -41,8 +41,8 @@ static const ebpf_instruction_template_t instruction_template[] = { {{0x17, DST, 0, 0, IMM}, bpf_conformance_groups_t::base64}, {{0x18, DST, 0, 0, IMM}, bpf_conformance_groups_t::base64}, {{0x18, DST, 1, 0, IMM}, bpf_conformance_groups_t::base64}, - // TODO(issue #533): add support for LDDW with src_reg > 1. - // {{0x18, DST, 2, 0, IMM}, bpf_conformance_groups_t::base64}, + {{0x18, DST, 2, 0, IMM}, bpf_conformance_groups_t::base64}, + // TODO(issue #533): add support for LDDW with src_reg > 2. // {{0x18, DST, 3, 0, IMM}, bpf_conformance_groups_t::base64}, // {{0x18, DST, 4, 0, IMM}, bpf_conformance_groups_t::base64}, // {{0x18, DST, 5, 0, IMM}, bpf_conformance_groups_t::base64}, @@ -392,6 +392,9 @@ TEST_CASE("disasm_marshal", "[disasm][marshal]") { } SECTION("LoadMapFd") { compare_marshal_unmarshal(LoadMapFd{.dst = Reg{1}, .mapfd = 1}, true); } + SECTION("LoadMapAddress") { + compare_marshal_unmarshal(LoadMapAddress{.dst = Reg{1}, .mapfd = 1, .offset = 4}, true); + } SECTION("Jmp") { auto ops = {Condition::Op::EQ, Condition::Op::GT, Condition::Op::GE, Condition::Op::SET, diff --git a/src/test/test_verify.cpp b/src/test/test_verify.cpp index 50d428b96..d7bfd0580 100644 --- a/src/test/test_verify.cpp +++ b/src/test/test_verify.cpp @@ -424,9 +424,6 @@ TEST_SECTION("falco", "probe.o", "raw_tracepoint/signal_deliver") TEST_SECTION_REJECT_IF_STRICT("build", "mapoverflow.o", ".text") TEST_SECTION_REJECT_IF_STRICT("build", "mapunderflow.o", ".text") -// Test uses global variables, which are not supported yet by the verifier. -TEST_SECTION_FAIL("build", "global_variable.o", ".text") - /* * These programs contain "call -1" instruction and cannot be verified: TEST_SECTION("raw_tracepoint/filler/sys_access_e") @@ -528,6 +525,7 @@ TEST_SECTION("build", "store_map_value_in_map.o", ".text") TEST_SECTION("build", "twomaps.o", ".text"); TEST_SECTION("build", "twostackvars.o", ".text"); TEST_SECTION("build", "twotypes.o", ".text"); +TEST_SECTION("build", "global_variable.o", ".text") TEST_PROGRAM("build", "prog_array.o", ".text", "func", 5); TEST_PROGRAM("build", "prog_array.o", ".text", "func0", 5); TEST_PROGRAM("build", "prog_array.o", ".text", "func1", 5); diff --git a/src/test/test_yaml.cpp b/src/test/test_yaml.cpp index a29ab173c..3b786b72c 100644 --- a/src/test/test_yaml.cpp +++ b/src/test/test_yaml.cpp @@ -31,6 +31,7 @@ YAML_CASE("test-data/sdivmod.yaml") YAML_CASE("test-data/full64.yaml") YAML_CASE("test-data/jump.yaml") YAML_CASE("test-data/loop.yaml") +YAML_CASE("test-data/map.yaml") YAML_CASE("test-data/movsx.yaml") YAML_CASE("test-data/muldiv.yaml") YAML_CASE("test-data/packet.yaml") diff --git a/test-data/map.yaml b/test-data/map.yaml new file mode 100644 index 000000000..750cff092 --- /dev/null +++ b/test-data/map.yaml @@ -0,0 +1,26 @@ +# Copyright (c) Prevail Verifier contributors. +# SPDX-License-Identifier: MIT +--- +test-case: load_map_address +pre: [] +code: + : | + r2 = map_val(1) + 8 +post: + - r2.shared_offset=8 + - r2.shared_region_size=4 + - r2.svalue=[1, 2147418112] + - r2.type=shared + - r2.uvalue=r2.svalue +--- +test-case: load_map_fd +pre: [] +code: + : | + r1 = map_fd 1 + +post: + - r1.map_fd=1 + - r1.type=map_fd + - r1.svalue=[1, 2147418112] + - r1.uvalue=r1.svalue