From f0f481980768a8f74127948c67cc2fa3c9e088c8 Mon Sep 17 00:00:00 2001 From: GroM Date: Thu, 1 Feb 2024 16:05:54 +0100 Subject: [PATCH] Add updated linker scripts, custom linker script and relocation code at app startup --- cargo-ledger/src/setup.rs | 28 ++++ ledger_device_sdk/link.ld | 87 ++++++++-- ledger_device_sdk/link_wrap.sh | 42 +++++ ledger_device_sdk/nanos.json | 20 ++- ledger_device_sdk/nanosplus.json | 18 +- ledger_device_sdk/nanox.json | 19 +-- ledger_secure_sdk_sys/src/c/src.c | 265 ++++++++++++++++++++++++++++++ 7 files changed, 441 insertions(+), 38 deletions(-) create mode 100755 ledger_device_sdk/link_wrap.sh diff --git a/cargo-ledger/src/setup.rs b/cargo-ledger/src/setup.rs index 1e5d7c2f..3864b301 100644 --- a/cargo-ledger/src/setup.rs +++ b/cargo-ledger/src/setup.rs @@ -39,4 +39,32 @@ pub fn install_targets() { println!("* {target} already installed"); } } + + // Install link_wrap.sh script needed for relocation into proper location + let target_host = std::str::from_utf8( + Command::new("rustup") + .arg("default") + .output() + .expect("failed to call rustup") + .stdout + .as_slice(), + ) + .unwrap(); + + let start = target_host.find('-')? + 1; + let end = target_host.find(' ').unwrap() + start; + println!( + " Output folder for custom link script {}", + &target_host[start..end] + ); + + let outfilepath = + sysroot.join(&target_host[start..end]).join("link_wrap.sh"); + let target_url = target_files_url.join("link_wrap.sh"); + let cmd = Command::new("curl") + .arg(target_url) + .arg("-o") + .arg(outfilepath) + .output() + .expect("failed to execute 'curl'"); } diff --git a/ledger_device_sdk/link.ld b/ledger_device_sdk/link.ld index 7bee28e9..9b052153 100644 --- a/ledger_device_sdk/link.ld +++ b/ledger_device_sdk/link.ld @@ -1,43 +1,110 @@ +PHDRS +{ + flash0 PT_LOAD ; + data PT_LOAD ; + flash2 PT_LOAD ; + sram PT_LOAD ; + + headers PT_PHDR PHDRS ; +} + SECTIONS { + /* Code, read only, no relocations needed. */ .text : { _text = .; + /* Here begins flash. This symbol is used by the ideompotent `pic` + function as the lower bound of addressed to relocate. */ _nvram_start = .; *(.boot*) *(.text*) - *(.rodata*) + /* .rodata is moved out so we can update it */ . = ALIGN(PAGE_SIZE); _etext = .; - } > FLASH + } > FLASH :flash0 - .nvm_data : ALIGN(PAGE_SIZE) + /* Relocations, read only, no relocations aginst the relocations themselves + needed! */ + _reloc_size = SIZEOF(.rel.rodata) + SIZEOF(.rel.data) + SIZEOF(.rel.nvm_data); + .rel_flash : ALIGN(PAGE_SIZE) { + _relocs = .; + + . += _reloc_size; + . = ALIGN(PAGE_SIZE); + + _erelocs = .; + + . = ALIGN(PAGE_SIZE); + + /* After this section we have mutable flash. Must be a multiple of PAGE_SIZE from _nvram_start. */ _nvram_data = .; - *(.nvm_data*) + } > FLASH :flash0 + + /* Immutable globals, read only during app running proper, but + relocations are needed. (So not read-only completely.) */ + .rodata : ALIGN(PAGE_SIZE) + { + /* Moved here from .text so we can permantly apply relocations to it with + nvm_write() */ . = ALIGN(PAGE_SIZE); - _envram_data = .; - _install_parameters = .; - _nvram_end = .; - } > FLASH + _rodata = .; + _rodata_src = .; + *(.rodata*) + . = ALIGN(PAGE_SIZE); + _erodata = .; + } > FLASH :flash0 + _rodata_len = _erodata - _rodata; + /* Mutable Globals, writable, relocations are needed. */ .data : ALIGN(4) { _data = .; *(vtable) *(.data*) + . = ALIGN(PAGE_SIZE); _edata = .; - } > SRAM + } > SRAM AT> FLASH :data =0xa4a4 + _data_len = SIZEOF(.data); ASSERT( (_edata - _data) <= 0, ".data section must be empty" ) + /* Persistent data, read and written during app running proper, + relocations are also needed. */ + .nvm_data : ALIGN(PAGE_SIZE) + { + *(.nvm_data*) + + /* Store _nvram value during link_pass and use this to detect movement of + _nvram as compared to the previous app execution, and redo the relocations + if necessary */ + . = ALIGN(4); + _nvram_prev_run = .; + LONG(ABSOLUTE(_nvram_start)) + + . = ALIGN(PAGE_SIZE); + + /* After this section we no longer have Flash memory at all. */ + + /* This symbol is used by the mutable portion of flash calculations. */ + _envram_data = .; + _install_parameters = .; + /* This symbol is used by the ideompotent `pic` function as the upper + bound of addressed to relocate. */ + _nvram_end = .; + } > FLASH :flash2 + + _sidata_src = LOADADDR(.data); + .bss : { _bss = .; *(.bss*) _ebss = .; + _bss_len = ABSOLUTE(_ebss) - ABSOLUTE(_bss); . = ALIGN(4); app_stack_canary = .; @@ -46,7 +113,7 @@ SECTIONS . = _stack_validation + STACK_SIZE; _stack = ABSOLUTE(END_STACK) - STACK_SIZE; _estack = ABSOLUTE(END_STACK); - } > SRAM + } > SRAM :sram .stack_sizes (INFO): { diff --git a/ledger_device_sdk/link_wrap.sh b/ledger_device_sdk/link_wrap.sh new file mode 100755 index 00000000..31f6cb7e --- /dev/null +++ b/ledger_device_sdk/link_wrap.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +set -eu + +set -x + +LD=${LD:-rust-lld} +# Needed because LLD gets behavior from argv[0] +LD=${LD/-ld/-lld} +${LD} "$@" --emit-relocs + +echo RUST_LLD DONE + +while [ $# -gt 0 -a "$1" != "-o" ]; +do + shift; +done +OUT="$2" + +echo OUT IS $OUT + +# the relocations for the constants section are required +llvm-objcopy --dump-section .rel.rodata=$OUT-rodata-reloc $OUT /dev/null +# there might not _be_ nonempty .data or .nvm_data sections, so there might be no relocations for it; fail gracefully. +llvm-objcopy --dump-section .rel.data=$OUT-data-reloc $OUT /dev/null || true +llvm-objcopy --dump-section .rel.nvm_data=$OUT-nvm-reloc $OUT /dev/null || true +# Concatenate the relocation sections; this should still write $OUT-relocs even if $OUT-data-reloc doesn't exist. +cat $OUT-rodata-reloc $OUT-nvm-reloc $OUT-data-reloc > $OUT-relocs || true + +reloc_allocated_size="$((0x$(llvm-nm $OUT | grep _reloc_size | cut -d' ' -f1)))" +reloc_real_size="$(stat -c %s $OUT-relocs)" +# Check that our relocations _actually_ fit. +if [ "$reloc_real_size" -gt "$reloc_allocated_size" ] +then + echo "Insufficient size for relocs; This is likely some bug in nanos_sdk's link.ld." + echo "Available size: " $reloc_allocated_size " Used size: " $reloc_real_size + exit 1 +fi + +truncate -s $reloc_allocated_size $OUT-relocs +# and write the relocs to their section in the flash image. +llvm-objcopy --update-section .rel_flash=$OUT-relocs $OUT diff --git a/ledger_device_sdk/nanos.json b/ledger_device_sdk/nanos.json index 601e8193..4d893faf 100644 --- a/ledger_device_sdk/nanos.json +++ b/ledger_device_sdk/nanos.json @@ -8,19 +8,21 @@ "executables": true, "features": "+strict-align", "frame-pointer": "always", - "linker": "rust-lld", + "linker": "link_wrap.sh", "linker-flavor": "ld.lld", "llvm-target": "thumbv6m-none-eabi", "panic-strategy": "abort", "pre-link-args": { - "ld.lld": [ - "-Tnanos_layout.ld", - "-Tlink.ld" - ], - "ld": [ - "-Tnanos_layout.ld", - "-Tlink.ld" - ] + "ld.lld": [ + "-Tnanos_layout.ld", + "-Tlink.ld", + "--emit-relocs" + ], + "ld": [ + "-Tnanos_layout.ld", + "-Tlink.ld", + "--emit-relocs" + ] }, "relocation-model": "ropi", "singlethread": true, diff --git a/ledger_device_sdk/nanosplus.json b/ledger_device_sdk/nanosplus.json index 9db91b70..5f3d2030 100644 --- a/ledger_device_sdk/nanosplus.json +++ b/ledger_device_sdk/nanosplus.json @@ -6,20 +6,20 @@ "emit-debug-gdb-scripts": false, "executables": true, "frame-pointer": "always", - "linker": "rust-lld", + "linker": "link_wrap.sh", "linker-flavor": "ld.lld", "llvm-target": "thumbv8m.main-none-eabi", "max-atomic-width": 32, "panic-strategy": "abort", "pre-link-args": { - "ld.lld": [ - "-Tnanosplus_layout.ld", - "-Tlink.ld" - ], - "ld": [ - "-Tnanosplus_layout.ld", - "-Tlink.ld" - ] + "ld.lld": [ + "-Tnanosplus_layout.ld", + "-Tlink.ld" + ], + "ld": [ + "-Tnanosplus_layout.ld", + "-Tlink.ld" + ] }, "relocation-model": "ropi-rwpi", "singlethread": true, diff --git a/ledger_device_sdk/nanox.json b/ledger_device_sdk/nanox.json index fe609604..245672ec 100644 --- a/ledger_device_sdk/nanox.json +++ b/ledger_device_sdk/nanox.json @@ -7,24 +7,23 @@ "emit-debug-gdb-scripts": false, "executables": true, "frame-pointer": "always", - "linker": "rust-lld", + "linker": "link_wrap.sh", "linker-flavor": "ld.lld", "llvm-target": "thumbv6m-none-eabi", "panic-strategy": "abort", "pre-link-args": { - "ld.lld": [ - "-Tnanox_layout.ld", - "-Tlink.ld" - ], - "ld": [ - "-Tnanox_layout.ld", - "-Tlink.ld" - ] + "ld.lld": [ + "-Tnanox_layout.ld", + "-Tlink.ld" + ], + "ld": [ + "-Tnanox_layout.ld", + "-Tlink.ld" + ] }, "relocation-model": "ropi-rwpi", "singlethread": true, "target-pointer-width": "32", "os": "nanox", "target-family": [ "bolos" ] - } diff --git a/ledger_secure_sdk_sys/src/c/src.c b/ledger_secure_sdk_sys/src/c/src.c index d238102d..339b7dfe 100644 --- a/ledger_secure_sdk_sys/src/c/src.c +++ b/ledger_secure_sdk_sys/src/c/src.c @@ -1,3 +1,5 @@ +#include +#include #include "exceptions.h" #include "os_apilevel.h" #include "string.h" @@ -11,8 +13,245 @@ extern void sample_main(); +struct SectionSrc; +struct SectionDst; + io_seph_app_t G_io_app; +extern Elf32_Rel _relocs; +extern Elf32_Rel _erelocs; + +// TODO get from header +void *pic(void *link_address); +void nvm_write (void *dst_adr, void *src_adr, unsigned int src_len); + +#ifdef SPECULOS_DEBUGGING +#define PRINTLNC(str) println_c(str) +void println_c(char* str); +#define PRINTHEXC(str, n) printhex_c(str, n) +void printhex_c(char* str, uint32_t m); +#else +#define PRINTLNC(str) while(0) +#define PRINTHEXC(str, n) while(0) +#endif + +#ifdef TARGET_NANOS2 // ARM v8 +# define SYMBOL_ABSOLUTE_VALUE(DST, SYM) \ + __asm volatile( \ + "movw %[result], #:lower16:" #SYM "\n\t" \ + "movt %[result], #:upper16:" #SYM \ + : [result] "=r" (DST)) +#else // ARM v6 +# define SYMBOL_ABSOLUTE_VALUE(DST, SYM) \ + __asm volatile( \ + "ldr %[result], =" #SYM \ + : [result] "=r" (DST)) +#endif + +#ifdef TARGET_NANOS2 +# define SYMBOL_SBREL_ADDRESS(DST, SYM) \ + __asm volatile( \ + "movw %[result], #:lower16:" #SYM "(sbrel)\n\t" \ + "movt %[result], #:upper16:" #SYM "(sbrel)\n\t" \ + "add %[result], r9, %[result]" \ + : [result] "=r" (DST)) +#elif defined(TARGET_NANOX) +# define SYMBOL_SBREL_ADDRESS(DST, SYM) \ + __asm volatile( \ + "ldr %[result], =" #SYM "(sbrel)\n\t" \ + "add %[result], r9, %[result]" \ + : [result] "=r" (DST)) +#elif defined(TARGET_NANOS) +# define SYMBOL_SBREL_ADDRESS(DST, SYM) \ + SYMBOL_ABSOLUTE_VALUE(DST, SYM) +#else +# error "unknown machine" +#endif + +void link_pass( + size_t sec_len, + struct SectionSrc *sec_src, + struct SectionDst *sec_dst, + int nvram_move_amt, + void* nvram_prev, + void* envram_prev, + int dst_ram) +{ +#ifdef TARGET_NANOS + uint32_t buf[16]; +#else + uint32_t buf[128]; +#endif + + typedef typeof(*buf) link_addr_t; + + Elf32_Rel* relocs; + SYMBOL_ABSOLUTE_VALUE(relocs, _relocs); + Elf32_Rel* erelocs; + SYMBOL_ABSOLUTE_VALUE(erelocs, _erelocs); + + + Elf32_Rel *reloc_start = pic(relocs); + Elf32_Rel *reloc_end = ((Elf32_Rel*)pic(erelocs-1)) + 1; + + PRINTHEXC("Section base address:", sec_dst); + PRINTHEXC("Section base address runtime:", pic(sec_dst)); + // Loop over pages of the .rodata section, + for (size_t i = 0; i < sec_len; i += sizeof(buf)) { + // We will want to know if we changed each page, to avoid extra write-backs. + bool is_changed = 0; + + size_t buf_size = sec_len - i < sizeof(buf) + ? sec_len - i + : sizeof(buf); + + // Copy over page from *run time* address. + memcpy(buf, pic(sec_src) + i, buf_size); + + // This is the elf load (*not* elf link or bolos run time!) address of the page + // we just copied. + link_addr_t page_link_addr = (link_addr_t)sec_dst + i; + + PRINTHEXC("Chunk base: ", page_link_addr); + PRINTHEXC("First reloc: ", reloc_start->r_offset); + + // Loop over the rodata entries - we could loop over the + // correct seciton, but this also works. + for (Elf32_Rel* reloc = reloc_start; reloc < reloc_end; reloc++) { + // This is the (absolute) elf *load* address of the relocation. + link_addr_t abs_offset = reloc->r_offset; + + // This is the relative offset on the current page, in + // bytes. + size_t page_offset = abs_offset - page_link_addr; + + // This is the relative offset on the current page, in words. + // + // Pointers in word_offset should be aligned to 4-byte + // boundaries because of alignment, so we can just make it + // uint32_t directly. + size_t word_offset = page_offset / sizeof(*buf); + + // This includes word_offset < 0 because uint32_t. + // Assuming no relocations go behind the end address. + if (word_offset < sizeof(buf) / sizeof(*buf)) { + PRINTLNC("Possible reloc"); + void* old = (void*) buf[word_offset]; + // The old ptr should lie within the nvram range of + // * Link time nvram range + // If the link_pass is running for the first time + // Or if the link_pass is running for RAM + // * The previous run's nvram range + // If the app has been moved after running the initial link_pass + if (old >= nvram_prev && old < envram_prev) { + void* new = old + nvram_move_amt; + is_changed |= (old != new); + buf[word_offset] = (uint32_t) new; + } + } + } + if (dst_ram) { + PRINTLNC("Chunk to ram"); + memcpy((void*)sec_dst + i, buf, buf_size); + } else if (is_changed) { + PRINTLNC("Chunk to flash"); + nvm_write(pic((void *)sec_dst + i), buf, buf_size); + if (memcmp(pic((void *)sec_dst + i), buf, buf_size)) { + try_context_set(NULL); + os_sched_exit(1); + } + } else { + PRINTLNC("Unchanged flash chunk"); + } + } + + /* PRINTLNC("Ending link pass"); */ +} + +void get_link_time_nvram_values( + void** nvram_ptr_p, + void** envram_ptr_p) +{ +#if defined(ST31) + SYMBOL_ABSOLUTE_VALUE(*nvram_ptr_p, _nvram); + SYMBOL_ABSOLUTE_VALUE(*envram_ptr_p, _envram); +#elif defined(ST33) || defined(ST33K1M5) + __asm volatile("ldr %0, =_nvram":"=r"(*nvram_ptr_p)); + __asm volatile("ldr %0, =_envram":"=r"(*envram_ptr_p)); +#else +#error "invalid architecture" +#endif +} + +void link_pass_ram( + size_t sec_len, + struct SectionSrc *sec_src, + struct SectionDst *sec_dst) +{ + void* nvram_ptr; + void* envram_ptr; + get_link_time_nvram_values(&nvram_ptr, &envram_ptr); + + // Value of _nvram in this run + void* nvram_current = pic(nvram_ptr); + + // Value (in bytes) of change in _nvram + int nvram_move_amt = nvram_current - nvram_ptr; + + // The nvram_prev and envram_prev are the link time values + link_pass(sec_len, sec_src, sec_dst, nvram_move_amt, nvram_ptr, envram_ptr, true); +} + +void link_pass_nvram( + size_t sec_len, + struct SectionSrc *sec_src, + struct SectionDst *sec_dst) +{ + void* nvram_ptr; + void* envram_ptr; + + get_link_time_nvram_values(&nvram_ptr, &envram_ptr); + + // Value of _nvram in this run + void* nvram_current = pic(nvram_ptr); + + void** nvram_prev_link_ptr; + SYMBOL_ABSOLUTE_VALUE(nvram_prev_link_ptr, _nvram_prev_run); + + // Pointer to the location where nvram_prev's value is stored + void** nvram_prev_val_ptr = (void**)pic(nvram_prev_link_ptr); + + // Value of _nvram and _envram in previous run + void* nvram_prev = *nvram_prev_val_ptr; + void* envram_prev = nvram_prev + (envram_ptr - nvram_ptr); + + void* link_pass_in_progress_tag = (void*) 0x1; + if (nvram_prev == link_pass_in_progress_tag) { + // This indicates that the previous link_pass did not complete successfully + // Abort the app to avoid unexpected behaviour + // The "fix" for this would be reinstalling the app + os_sched_exit(1); + } + + // Value (in bytes) of change in _nvram + // If the app was moved after the previous run or link time + int nvram_move_amt = nvram_current - nvram_prev; + + if (nvram_move_amt == 0) { + // No change in _nvram means that we need not do link_pass again + return; + } + + // Add a tag to indicate we are in the middle of executing the link_pass + nvm_write(nvram_prev_val_ptr, &link_pass_in_progress_tag, sizeof(void*)); + + link_pass(sec_len, sec_src, sec_dst, nvram_move_amt, nvram_prev, envram_prev, false); + + // After successful completion of link_pass, clear the link_pass_in_progress_tag + // And write the proper value of nvram_current + nvm_write(nvram_prev_val_ptr, &nvram_current, sizeof(void*)); +} + #ifdef HAVE_CCID #include "usbd_ccid_if.h" uint8_t G_io_apdu_buffer[260]; @@ -21,6 +260,32 @@ uint8_t G_io_apdu_buffer[260]; int c_main(void) { __asm volatile("cpsie i"); + // Update pointers for pic(), only issuing nvm_write() if we actually changed a pointer in the block. + // link_pass(&_rodata_len, &_rodata_src, &_rodata); + size_t rodata_len; + SYMBOL_ABSOLUTE_VALUE(rodata_len, _rodata_len); + struct SectionSrc* rodata_src; + SYMBOL_ABSOLUTE_VALUE(rodata_src, _rodata_src); + struct SectionDst* rodata; + SYMBOL_ABSOLUTE_VALUE(rodata, _rodata); + + link_pass_nvram(rodata_len, rodata_src, rodata); + + size_t data_len; + SYMBOL_ABSOLUTE_VALUE(data_len, _data_len); + struct SectionSrc* sidata_src; + SYMBOL_ABSOLUTE_VALUE(sidata_src, _sidata_src); + struct SectionDst* data; + __asm volatile("mov %[result],r9" : [result] "=r" (data)); + + link_pass_ram(data_len, sidata_src, data); + + size_t bss_len; + SYMBOL_ABSOLUTE_VALUE(bss_len, _bss_len); + struct SectionDst* bss; + SYMBOL_SBREL_ADDRESS(bss, _bss); + memset(bss, 0, bss_len); + // formerly known as 'os_boot()' try_context_set(NULL);