From abd2e4225b300c174e359bab2dae1ee4927925b4 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 13 Feb 2023 21:53:18 +0000 Subject: [PATCH] App loading with more features 1. Relocations are performed, obviating the need for manual `pic` sprinkling. 2. Globals with initial values (vs all-0 globals in BSS) are now initialized to those values. --- .github/workflows/rust.yml | 5 +- examples/signature.rs | 12 +++ link.ld | 122 ++++++++++++++++++++++---- nanos.json | 4 +- nanosplus.json | 2 +- nanox.json | 3 +- scripts/link_wrap.sh | 45 ++++++++++ src/c/src.c | 171 +++++++++++++++++++++++++++++++++++++ src/lib.rs | 14 +++ 9 files changed, 356 insertions(+), 22 deletions(-) create mode 100755 scripts/link_wrap.sh diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 8ca7f67f..3372acb2 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -73,6 +73,7 @@ jobs: toolchain: nightly override: true components: rust-src + - run: echo "/rsdk/scripts" >> $GITHUB_PATH - uses: actions/checkout@v2 - name: Cargo build uses: actions-rs/cargo@v1 @@ -93,11 +94,13 @@ jobs: release: '9-2019-q4' - name: Install clang run: sudo apt-get update && sudo apt install -y clang - - uses: actions-rs/toolchain@v1 + - name: Install toolchains + uses: actions-rs/toolchain@v1 with: toolchain: nightly override: true components: rust-src + - run: echo "$PWD/scripts" >> $GITHUB_PATH - name: Install dependencies run: | sudo apt-get update && sudo apt-get install -y qemu-user-static diff --git a/examples/signature.rs b/examples/signature.rs index d5c831bb..7b70cd79 100644 --- a/examples/signature.rs +++ b/examples/signature.rs @@ -1,6 +1,9 @@ #![no_std] #![no_main] +#![feature(asm_const)] +#![feature(cfg_version)] + use core::panic::PanicInfo; #[panic_handler] @@ -20,3 +23,12 @@ fn sign_message_const(m: &[u8], path: &[u32]) -> Result<([u8; 72], u32, u32), u3 fn sample_main() { let _signature = sign_message_const(b"Hello world", &PATH).unwrap(); } + +#[cfg_attr(not(version("1.64")), allow(unused))] +const RELOC_SIZE: usize = 3500; + +::core::arch::global_asm! { + ".global _reloc_size", + ".set _reloc_size, {reloc_size}", + reloc_size = const RELOC_SIZE, +} diff --git a/link.ld b/link.ld index 4601ca96..d0e7087d 100644 --- a/link.ld +++ b/link.ld @@ -1,43 +1,118 @@ +PHDRS +{ + flash0 PT_LOAD ; + data PT_LOAD ; + flash2 PT_LOAD ; + sram PT_LOAD ; + + headers PT_PHDR PHDRS ; +} + SECTIONS { + /* Code, read only, no relocations needed. */ .text : { - _text = .; + /* Here begins flash. This symbol is used by the ideompotent `pic` + function as the lower bound of addressed to relocate. */ _nvram_start = .; + _text = .; + *(.boot*) *(.text*) - *(.rodata*) - + /* .rodata is moved out so we can update it */ . = ALIGN(PAGE_SIZE); + _etext = .; - } > FLASH + } > FLASH :flash0 + _text_len = _etext - _text; - .nvm_data : ALIGN(PAGE_SIZE) + /* Relocations, read only, no relocations aginst the relocations themselves + needed! */ + .rel_flash : ALIGN(PAGE_SIZE) { + _relocs = .; + + /* define _reloc_size in the build.rs */ + . += _reloc_size; + /*(.rel.rodata .rel.rodata*)*/ + /*(.rel.data .rel.data*)*/ + /*(.rel.nvm_data .rel.nvm_data*)*/ + . = ALIGN(PAGE_SIZE); + + _erelocs = .; + + . = ALIGN(PAGE_SIZE); + + /* After this section we have mutable flash. Must be a multiple of PAGE_SIZE from _nvram_start. */ _nvram_data = .; - *(.nvm_data*) + } > FLASH :flash0 + _relocs_len = _erelocs - _relocs; + + /* Immutable globals, read only during app running proper, but + relocations are needed. (So not read-only completely.) */ + .rodata : ALIGN(PAGE_SIZE) + { + /* Moved here from .text so we can permantly apply relocations to it with + nvm_write() */ . = ALIGN(PAGE_SIZE); - _envram_data = .; - _install_parameters = .; - _nvram_end = .; - } > FLASH + _rodata = .; + _rodata_src = .; + *(.rodata*) + /**(.rodata*)*/ + . = ALIGN(PAGE_SIZE); + _erodata = .; + } > FLASH :flash0 + _rodata_len = _erodata - _rodata; - .data : ALIGN(4) + /* Mutable Globals, writable, relocations are needed. */ + .data : ALIGN(PAGE_SIZE) { _data = .; *(vtable) *(.data*) + . = ALIGN(PAGE_SIZE); _edata = .; - } > SRAM + FILL(0xa4a4); + } > SRAM AT> FLASH :data =0xa4a4 + _data_len = SIZEOF(.data); /* _edata - _data; */ + + /* Persistent data, read and written during app running proper, + relocations are also needed. */ + .nvm_data : ALIGN(PAGE_SIZE) + { + _nvm_data = .; + _nvm_data_src = .; + *(.nvm_data*) + . = ALIGN(PAGE_SIZE); + _envm_data = .; + + /* Debugging: Add some extra buffer space just to make sure we don't cloober install_parameters */ + . += PAGE_SIZE*4; + /* and make sure we're on a page boundary for them */ + . = ALIGN(PAGE_SIZE); + _install_parameters = .; + + /* After this section we no longer have Flash memory at all. */ + + /* This symbol is used by the mutable portion of flash calculations. */ + _envram_data = .; + /* This symbol is used by the ideompotent `pic` function as the upper + bound of addressed to relocate. */ + _nvram_end = .; + } > FLASH :flash2 + _nvm_data_len = _envm_data - _nvm_data; - ASSERT( (_edata - _data) <= 0, ".data section must be empty" ) + _sidata = LOADADDR(.data); + _sidata_src = LOADADDR(.data); - .bss : + .bss : { _bss = .; *(.bss*) _ebss = .; + _bss_len = ABSOLUTE(_ebss) - ABSOLUTE(_bss); . = ALIGN(4); app_stack_canary = .; @@ -46,12 +121,24 @@ SECTIONS . = _stack_validation + STACK_SIZE; _stack = ABSOLUTE(END_STACK) - STACK_SIZE; _estack = ABSOLUTE(END_STACK); - } > SRAM + } > SRAM :sram .stack_sizes (INFO): { KEEP(*(.stack_sizes)); - } + } :NONE + + /* + .rel (INFO): + { + KEEP(*(.rel.*)); + } :NONE + */ + + /*.text.debug_info (INFO): + { + *(.text.debug_info) + }*/ /DISCARD/ : { @@ -59,6 +146,9 @@ SECTIONS libm.a ( * ) libgcc.a ( * ) *(.ARM.exidx* .gnu.linkonce.armexidx.*) + *(.ARM.extab*) + /* *(.rel.debug_info) + *(.rela.debug_info) */ *(.debug_info) } } diff --git a/nanos.json b/nanos.json index 601e8193..f8600a59 100644 --- a/nanos.json +++ b/nanos.json @@ -4,11 +4,11 @@ "atomic-cas": false, "c-enum-min-bits": 8, "data-layout": "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64", - "emit-debug-gdb-scripts": false, + "emit-debug-gdb-scripts": true, "executables": true, "features": "+strict-align", "frame-pointer": "always", - "linker": "rust-lld", + "linker": "link_wrap.sh", "linker-flavor": "ld.lld", "llvm-target": "thumbv6m-none-eabi", "panic-strategy": "abort", diff --git a/nanosplus.json b/nanosplus.json index 9db91b70..69df5636 100644 --- a/nanosplus.json +++ b/nanosplus.json @@ -6,7 +6,7 @@ "emit-debug-gdb-scripts": false, "executables": true, "frame-pointer": "always", - "linker": "rust-lld", + "linker": "link_wrap.sh", "linker-flavor": "ld.lld", "llvm-target": "thumbv8m.main-none-eabi", "max-atomic-width": 32, diff --git a/nanox.json b/nanox.json index fe609604..4037b408 100644 --- a/nanox.json +++ b/nanox.json @@ -7,7 +7,7 @@ "emit-debug-gdb-scripts": false, "executables": true, "frame-pointer": "always", - "linker": "rust-lld", + "linker": "link_wrap.sh", "linker-flavor": "ld.lld", "llvm-target": "thumbv6m-none-eabi", "panic-strategy": "abort", @@ -26,5 +26,4 @@ "target-pointer-width": "32", "os": "nanox", "target-family": [ "bolos" ] - } diff --git a/scripts/link_wrap.sh b/scripts/link_wrap.sh new file mode 100755 index 00000000..46d79da1 --- /dev/null +++ b/scripts/link_wrap.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +set -eu + +set -x + +LD=${LD:-rust-lld} +# Needed because LLD gets behavior from argv[0] +LD=${LD/-ld/-lld} +${LD} "$@" --emit-relocs + +echo RUST_LLD DONE + +while [ $# -gt 0 -a "$1" != "-o" ]; +do + shift; +done +OUT="$2" + +echo OUT IS $OUT + +# the relocations for the constants section are required +${OBJCOPY} --dump-section .rel.rodata=$OUT-rodata-reloc $OUT /dev/null +# there might not _be_ nonempty .data or .nvm_data sections, so there might be no relocations for it; fail gracefully. +${OBJCOPY} --dump-section .rel.data=$OUT-data-reloc $OUT /dev/null || true +${OBJCOPY} --dump-section .rel.nvm_data=$OUT-nvm-reloc $OUT /dev/null || true +# Concatenate the relocation sections; this should still write $OUT-relocs even if $OUT-data-reloc doesn't exist. +cat $OUT-rodata-reloc $OUT-nvm-reloc $OUT-data-reloc > $OUT-relocs || true +# pad the relocs out to size - we should probably make some way to adjust this size from the source. + +reloc_allocated_size="$((0x$(${NM} $OUT | grep _reloc_size | cut -d' ' -f1)))" +reloc_real_size="$(stat --format %s $OUT-relocs)" +# Check that our relocations _actually_ fit. +if [ "$reloc_real_size" -gt "$reloc_allocated_size" ] +then + echo "Insufficient size for relocs; increase it in build.rs." + echo "Available size: " $reloc_allocated_size " Used size: " $reloc_real_size + exit 1 +else + echo "Sufficient size:" $reloc_allocated_size $reloc_real_size +fi + +truncate -s $reloc_allocated_size $OUT-relocs +# and write the relocs to their section in the flash image. +${OBJCOPY} --update-section .rel_flash=$OUT-relocs $OUT diff --git a/src/c/src.c b/src/c/src.c index b520a285..02da83ff 100644 --- a/src/c/src.c +++ b/src/c/src.c @@ -1,3 +1,5 @@ +#include +#include #include "exceptions.h" #include "os_apilevel.h" #include "string.h" @@ -15,8 +17,151 @@ void os_longjmp(unsigned int exception) { longjmp(try_context_get()->jmp_buf, exception); } +struct SectionSrc; +struct SectionDst; + io_seph_app_t G_io_app; +extern Elf32_Rel _relocs; +extern Elf32_Rel _erelocs; + +// TODO get from header +void *pic(void *link_address); +void nvm_write (void *dst_adr, void *src_adr, unsigned int src_len); + +#ifdef SPECULOS_DEBUGGING +#define PRINTLNC(str) println_c(str) +void println_c(char* str); +#define PRINTHEXC(str, n) printhex_c(str, n) +void printhex_c(char* str, uint32_t m); +#else +#define PRINTLNC(str) while(0) +#define PRINTHEXC(str, n) while(0) +#endif + +#ifdef TARGET_NANOS2 // ARM v8 +# define SYMBOL_ABSOLUTE_VALUE(DST, SYM) \ + __asm volatile( \ + "movw %[result], #:lower16:" #SYM "\n\t" \ + "movt %[result], #:upper16:" #SYM \ + : [result] "=r" (DST)) +#else // ARM v6 +# define SYMBOL_ABSOLUTE_VALUE(DST, SYM) \ + __asm volatile( \ + "ldr %[result], =" #SYM \ + : [result] "=r" (DST)) +#endif + +#ifdef TARGET_NANOS2 +# define SYMBOL_SBREL_ADDRESS(DST, SYM) \ + __asm volatile( \ + "movw %[result], #:lower16:" #SYM "(sbrel)\n\t" \ + "movt %[result], #:upper16:" #SYM "(sbrel)\n\t" \ + "add %[result], r9, %[result]" \ + : [result] "=r" (DST)) +#elif defined(TARGET_NANOX) +# define SYMBOL_SBREL_ADDRESS(DST, SYM) \ + __asm volatile( \ + "ldr %[result], =" #SYM "(sbrel)\n\t" \ + "add %[result], r9, %[result]" \ + : [result] "=r" (DST)) +#elif defined(TARGET_NANOS) +# define SYMBOL_SBREL_ADDRESS(DST, SYM) \ + SYMBOL_ABSOLUTE_VALUE(DST, SYM) +#else +# error "unknown machine" +#endif + +void link_pass( + size_t sec_len, + struct SectionSrc *sec_src, + struct SectionDst *sec_dst, + int dst_ram) +{ +#ifdef TARGET_NANOS + uint32_t buf[16]; +#else + uint32_t buf[128]; +#endif + + typedef typeof(*buf) link_addr_t; + typedef typeof(*buf) install_addr_t; + + Elf32_Rel* relocs; + SYMBOL_ABSOLUTE_VALUE(relocs, _relocs); + Elf32_Rel* erelocs; + SYMBOL_ABSOLUTE_VALUE(erelocs, _erelocs); + + + Elf32_Rel *reloc_start = pic(relocs); + Elf32_Rel *reloc_end = ((Elf32_Rel*)pic(erelocs-1)) + 1; + + PRINTHEXC("Section base address:", sec_dst); + PRINTHEXC("Section base address runtime:", pic(sec_dst)); + // Loop over pages of the .rodata section, + for (size_t i = 0; i < sec_len; i += sizeof(buf)) { + // We will want to know if we changed each page, to avoid extra write-backs. + bool is_changed = 0; + + size_t buf_size = sec_len - i < sizeof(buf) + ? sec_len - i + : sizeof(buf); + + // Copy over page from *run time* address. + memcpy(buf, pic(sec_src) + i, buf_size); + + // This is the elf load (*not* elf link or bolos run time!) address of the page + // we just copied. + link_addr_t page_link_addr = (link_addr_t)sec_dst + i; + + PRINTHEXC("Chunk base: ", page_link_addr); + PRINTHEXC("First reloc: ", reloc_start->r_offset); + + // Loop over the rodata entries - we could loop over the + // correct seciton, but this also works. + for (Elf32_Rel* reloc = reloc_start; reloc < reloc_end; reloc++) { + // This is the (absolute) elf *load* address of the relocation. + link_addr_t abs_offset = reloc->r_offset; + + // This is the relative offset on the current page, in + // bytes. + size_t page_offset = abs_offset - page_link_addr; + + // This is the relative offset on the current page, in words. + // + // Pointers in word_offset should be aligned to 4-byte + // boundaries because of alignment, so we can just make it + // uint32_t directly. + size_t word_offset = page_offset / sizeof(*buf); + + // This includes word_offset < 0 because uint32_t. + // Assuming no relocations go behind the end address. + if (word_offset < sizeof(buf) / sizeof(*buf)) { + PRINTLNC("Possible reloc"); + link_addr_t old = buf[word_offset]; + install_addr_t new = pic(old); + is_changed |= (old != new); + buf[word_offset] = new; + } + } + if (dst_ram) { + PRINTLNC("Chunk to ram"); + memcpy((void*)sec_dst + i, buf, buf_size); + } else if (is_changed) { + PRINTLNC("Chunk to flash"); + nvm_write(pic((void *)sec_dst + i), buf, buf_size); + if (memcmp(pic((void *)sec_dst + i), buf, buf_size)) { + try_context_set(NULL); + os_sched_exit(1); + } + } else { + PRINTLNC("Unchanged flash chunk"); + } + } + + /* PRINTLNC("Ending link pass"); */ +} + #ifdef HAVE_CCID #include "usbd_ccid_if.h" uint8_t G_io_apdu_buffer[260]; @@ -25,6 +170,32 @@ uint8_t G_io_apdu_buffer[260]; int c_main(void) { __asm volatile("cpsie i"); + // Update pointers for pic(), only issuing nvm_write() if we actually changed a pointer in the block. + // link_pass(&_rodata_len, &_rodata_src, &_rodata); + size_t rodata_len; + SYMBOL_ABSOLUTE_VALUE(rodata_len, _rodata_len); + struct SectionSrc* rodata_src; + SYMBOL_ABSOLUTE_VALUE(rodata_src, _rodata_src); + struct SectionDst* rodata; + SYMBOL_ABSOLUTE_VALUE(rodata, _rodata); + + link_pass(rodata_len, rodata_src, rodata, 0); + + size_t data_len; + SYMBOL_ABSOLUTE_VALUE(data_len, _data_len); + struct SectionSrc* sidata_src; + SYMBOL_ABSOLUTE_VALUE(sidata_src, _sidata_src); + struct SectionDst* data; + __asm volatile("mov %[result],r9" : [result] "=r" (data)); + + link_pass(data_len, sidata_src, data, 1); + + size_t bss_len; + SYMBOL_ABSOLUTE_VALUE(bss_len, _bss_len); + struct SectionDst* bss; + SYMBOL_SBREL_ADDRESS(bss, _bss); + memset(bss, 0, bss_len); + // formerly known as 'os_boot()' try_context_set(NULL); diff --git a/src/lib.rs b/src/lib.rs index 0edfe278..9541c349 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,8 @@ #![test_runner(testing::sdk_test_runner)] #![allow(incomplete_features)] #![feature(generic_const_exprs)] +#![cfg_attr(test, feature(asm_const))] +#![cfg_attr(test, feature(cfg_version))] pub mod bindings; @@ -180,3 +182,15 @@ impl NVMData { fn sample_main() { test_main(); } + +#[cfg(all(target_family = "bolos", test))] +mod test { + #![cfg_attr(not(version("1.64")), allow(unused))] + const RELOC_SIZE: usize = 3500; + + ::core::arch::global_asm! { + ".global _reloc_size", + ".set _reloc_size, {reloc_size}", + reloc_size = const RELOC_SIZE, + } +}