diff --git a/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/compression.asm b/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/compression.asm index a9467a00b..6ff84301b 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/compression.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/compression.asm @@ -65,10 +65,11 @@ compression_loop: %mload_kernel_code_u32 // stack: K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP10 - DUP10 - DUP10 - DUP10 - // stack: e[i], f[i], g[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest + DUP8 + DUP11 + DUP11 + DUP11 + // stack: e[i], f[i], g[i], e[i], h[i], K[i], W[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest %sha2_temp_word1 // stack: T1[i], a[i], b[i], c[i], d[i], e[i], f[i], g[i], h[i], num_blocks, scratch_space_addr, message_schedule_addr, i, a[0]..h[0], retdest DUP4 diff --git a/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/message_schedule.asm b/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/message_schedule.asm index 66fa67a9b..3bcd7dbfc 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/message_schedule.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/message_schedule.asm @@ -13,13 +13,11 @@ gen_message_schedule_from_block: // stack: block_addr, output_addr, retdest DUP1 // stack: block_addr, block_addr, output_addr, retdest - %add_const(32) - // stack: block_addr + 32, block_addr, output_addr, retdest - SWAP1 - // stack: block_addr, block_addr + 32, output_addr, retdest %mload_u256 - // stack: block[0], block_addr + 32, output_addr, retdest + // stack: block[0], block_addr, output_addr, retdest SWAP1 + // stack: block_addr, block[0], output_addr, retdest + %add_const(32) // stack: block_addr + 32, block[0], output_addr, retdest %mload_u256 // stack: block[1], block[0], output_addr, retdest @@ -45,27 +43,22 @@ gen_message_schedule_from_block_0_loop: // stack: output_addr, block[0] % (1 << 32), block[0] >> 32, output_addr, counter, block[1], retdest %mstore_u32 // stack: block[0] >> 32, output_addr, counter, block[1], retdest - SWAP1 - // stack: output_addr, block[0] >> 32, counter, block[1], retdest - %sub_const(4) - // stack: output_addr - 4, block[0] >> 32, counter, block[1], retdest - SWAP1 - // stack: block[0] >> 32, output_addr - 4, counter, block[1], retdest + %stack (block0_shifted, output_addr, counter) -> (output_addr, 4, 1, counter, block0_shifted) + SUB + // stack: output_addr - 4, 1, counter, block[0] >> 32, block[1], retdest SWAP2 - // stack: counter, output_addr - 4, block[0] >> 32, block[1], retdest - %decrement + SUB + // stack: counter - 1, output_addr - 4, block[0] >> 32, block[1], retdest DUP1 %jumpi(gen_message_schedule_from_block_0_loop) gen_message_schedule_from_block_0_end: // stack: old counter=0, output_addr, block[0], block[1], retdest POP // stack: output_addr, block[0], block[1], retdest - %stack (out, b0, b1) -> (out, 8, b1, b0) - // stack: output_addr, counter=8, block[1], block[0], retdest %add_const(64) - // stack: output_addr + 64, counter, block[1], block[0], retdest - SWAP1 - // stack: counter, output_addr + 64, block[1], block[0], retdest + // stack: output_addr + 64, block[0], block[1], retdest + %stack (out, b0, b1) -> (8, out, b1, b0) + // stack: counter=8, output_addr + 64, block[1], block[0], retdest gen_message_schedule_from_block_1_loop: // Split the second half (256 bits) of the block into the next eight (32-bit) chunks of the message sdchedule. // stack: counter, output_addr, block[1], block[0], retdest @@ -83,29 +76,22 @@ gen_message_schedule_from_block_1_loop: // stack: output_addr, block[1] % (1 << 32), block[1] >> 32, output_addr, counter, block[0], retdest %mstore_u32 // stack: block[1] >> 32, output_addr, counter, block[0], retdest - SWAP1 - // stack: output_addr, block[1] >> 32, counter, block[0], retdest - %sub_const(4) - // stack: output_addr - 4, block[1] >> 32, counter, block[0], retdest - SWAP1 - // stack: block[1] >> 32, output_addr - 4, counter, block[0], retdest + %stack (block1_shifted, output_addr, counter) -> (output_addr, 4, 1, counter, block1_shifted) + SUB + // stack: output_addr - 4, 1, counter, block[1] >> 32, block[0], retdest SWAP2 - // stack: counter, output_addr - 4, block[1] >> 32, block[0], retdest - %decrement + SUB + // stack: counter - 1, output_addr - 4, block[1] >> 32, block[0], retdest DUP1 %jumpi(gen_message_schedule_from_block_1_loop) gen_message_schedule_from_block_1_end: // stack: old counter=0, output_addr, block[1], block[0], retdest POP // stack: output_addr, block[0], block[1], retdest - PUSH 48 - // stack: counter=48, output_addr, block[0], block[1], retdest - SWAP1 - // stack: output_addr, counter, block[0], block[1], retdest %add_const(36) - // stack: output_addr + 36, counter, block[0], block[1], retdest - SWAP1 - // stack: counter, output_addr + 36, block[0], block[1], retdest + // stack: output_addr + 36, block[0], block[1], retdest + PUSH 48 + // stack: counter=48, output_addr + 36, block[0], block[1], retdest gen_message_schedule_remaining_loop: // Generate the next 48 chunks of the message schedule, one at a time, from prior chunks. // stack: counter, output_addr, block[0], block[1], retdest @@ -153,9 +139,10 @@ gen_message_schedule_remaining_loop: // stack: output_addr, x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], sigma_1(x[output_addr - 2*4]), counter, block[0], block[1], retdest SWAP4 // stack: sigma_1(x[output_addr - 2*4]), x[output_addr - 16*4], sigma_0(x[output_addr - 15*4]), x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest - %add_u32 - %add_u32 - %add_u32 + ADD + ADD + ADD + %as_u32 // stack: sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest DUP2 // stack: output_addr, sigma_1(x[output_addr - 2*4]) + x[output_addr - 16*4] + sigma_0(x[output_addr - 15*4]) + x[output_addr - 7*4], output_addr, counter, block[0], block[1], retdest @@ -174,7 +161,7 @@ gen_message_schedule_remaining_end: %pop4 JUMP -// Precodition: memory, starting at 0, contains num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] +// Precondition: memory, starting at 0, contains num_blocks, block0[0], ..., block0[63], block1[0], ..., blocklast[63] // stack contains output_addr // Postcondition: starting at output_addr, set of 256 bytes per block // each contains the 64 32-bit chunks of the message schedule for that block (in four-byte increments) @@ -182,12 +169,14 @@ global sha2_gen_all_message_schedules: // stack: output_addr, retdest DUP1 // stack: output_addr, output_addr, retdest - %mload_current_general_no_offset - // stack: num_blocks, output_addr, output_addr, retdest - PUSH 1 - // stack: cur_offset = 1, counter = num_blocks, output_addr, output_addr, retdest - %build_current_general_address - // stack: cur_addr, counter, output_addr, output_addr, retdest + %build_current_general_address_no_offset + DUP1 + // stack: base_addr, base_addr, output_addr, output_addr, retdest + MLOAD_GENERAL + // stack: num_blocks, base_addr, output_addr, output_addr, retdest + SWAP1 + %increment + // stack: cur_addr (offset = 1), counter = num_blocks, output_addr, output_addr, retdest gen_all_message_schedules_loop: // stack: cur_addr, counter, cur_output_addr, output_addr, retdest PUSH gen_all_message_schedules_loop_end diff --git a/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/ops.asm b/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/ops.asm index d50e5c9a8..f0da871a5 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/ops.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/ops.asm @@ -1,14 +1,14 @@ // 32-bit right rotation %macro rotr(rot) // stack: value + DUP1 + // stack: value, value PUSH $rot - // stack: rot, value - DUP2 - DUP2 - // stack: rot, value, rot, value + // stack: rot, value, value SHR - // stack: value >> rot, rot, value - %stack (shifted, rot, value) -> (rot, value, shifted) + // stack: value >> rot, value + SWAP1 + PUSH $rot // stack: rot, value, value >> rot PUSH 32 SUB @@ -26,16 +26,14 @@ // stack: x, x %rotr(7) // stack: rotr(x, 7), x - SWAP1 - // stack: x, rotr(x, 7) DUP1 - // stack: x, x, rotr(x, 7) - %rotr(18) - // stack: rotr(x, 18), x, rotr(x, 7) - SWAP1 - // stack: x, rotr(x, 18), rotr(x, 7) + // stack: rotr(x, 7), rotr(x, 7), x + %rotr(11) + // stack: rotr(x, 18), rotr(x, 7), x + SWAP2 + // stack: x, rotr(x, 7), rotr(x, 18) %shr_const(3) - // stack: shr(x, 3), rotr(x, 18), rotr(x, 7) + // stack: shr(x, 3), rotr(x, 7), rotr(x, 18) XOR XOR %endmacro @@ -46,36 +44,30 @@ // stack: x, x %rotr(17) // stack: rotr(x, 17), x - SWAP1 - // stack: x, rotr(x, 17) DUP1 - // stack: x, x, rotr(x, 17) - %rotr(19) - // stack: rotr(x, 19), x, rotr(x, 17) - SWAP1 - // stack: x, rotr(x, 19), rotr(x, 17) + // stack: rotr(x, 17), rotr(x, 17), x + %rotr(2) + // stack: rotr(x, 19), rotr(x, 17), x + SWAP2 + // stack: x, rotr(x, 17), rotr(x, 19) PUSH 10 SHR - // stack: shr(x, 10), rotr(x, 19), rotr(x, 17) + // stack: shr(x, 10), rotr(x, 17), rotr(x, 19) XOR XOR %endmacro %macro sha2_bigsigma_0 // stack: x - DUP1 - // stack: x, x %rotr(2) - // stack: rotr(x, 2), x - SWAP1 - // stack: x, rotr(x, 2) + // stack: rotr(x, 2) DUP1 - // stack: x, x, rotr(x, 2) - %rotr(13) - // stack: rotr(x, 13), x, rotr(x, 2) - SWAP1 - // stack: x, rotr(x, 13), rotr(x, 2) - %rotr(22) + // stack: rotr(x, 2), rotr(x, 2) + %rotr(11) + // stack: rotr(x, 13), rotr(x, 2) + DUP1 + // stack: rotr(x, 13), rotr(x, 13), rotr(x, 2) + %rotr(9) // stack: rotr(x, 22), rotr(x, 13), rotr(x, 2) XOR XOR @@ -83,19 +75,15 @@ %macro sha2_bigsigma_1 // stack: x - DUP1 - // stack: x, x %rotr(6) - // stack: rotr(x, 6), x - SWAP1 - // stack: x, rotr(x, 6) + // stack: rotr(x, 6) DUP1 - // stack: x, x, rotr(x, 6) - %rotr(11) - // stack: rotr(x, 11), x, rotr(x, 6) - SWAP1 - // stack: x, rotr(x, 11), rotr(x, 6) - %rotr(25) + // stack: rotr(x, 6), rotr(x, 6) + %rotr(5) + // stack: rotr(x, 11), rotr(x, 6) + DUP1 + // stack: rotr(x, 11), rotr(x, 11), rotr(x, 6) + %rotr(14) // stack: rotr(x, 25), rotr(x, 11), rotr(x, 6) XOR XOR @@ -103,41 +91,31 @@ %macro sha2_choice // stack: x, y, z - DUP1 - // stack: x, x, y, z - NOT - // stack: not x, x, y, z SWAP1 - // stack: x, not x, y, z - SWAP3 - // stack: z, not x, y, x - AND - // stack: (not x) and z, y, x - SWAP2 - // stack: x, y, (not x) and z + // stack: y, x, z + DUP3 + // stack: z, y, x, z + XOR + // stack: z xor y, x, z AND - // stack: x and y, (not x) and z - OR + // stack: (z xor y) and x, z + XOR + // stack: ((z xor y) and x) xor z == (x and y) xor (not x and z) %endmacro %macro sha2_majority // stack: x, y, z - DUP1 - // stack: x, x, y, z - DUP3 - // stack: y, x, x, y, z - DUP5 - // stack: z, y, x, x, y, z - AND - // stack: z and y, x, x, y, z - SWAP4 - // stack: z, x, x, y, z and y + DUP2 + DUP2 AND - // stack: z and x, x, y, z and y + // stack: x and y, x, y, z SWAP2 - // stack: y, x, z and x, z and y - AND - // stack: y and x, z and x, z and y + // stack: y, x, x and y, z OR + // stack: y or x, x and y, z + %stack(y_or_x, x_and_y, z) -> (z, y_or_x, x_and_y) + AND + // stack: z and (y or x), x and y OR + // stack: (z and (y or x) or (x and y) == (x and y) or (x and z) or (y and z) %endmacro diff --git a/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/temp_words.asm b/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/temp_words.asm index ed610947f..0f3fb4b7a 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/temp_words.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/temp_words.asm @@ -1,18 +1,16 @@ // "T_1" in the SHA-256 spec %macro sha2_temp_word1 - // stack: e, f, g, h, K[i], W[i] - DUP1 - // stack: e, e, f, g, h, K[i], W[i] - %sha2_bigsigma_1 - // stack: Sigma_1(e), e, f, g, h, K[i], W[i] - %stack (sig, e, f, g) -> (e, f, g, sig) - // stack: e, f, g, Sigma_1(e), h, K[i], W[i] + // stack: e, f, g, e, h, K[i], W[i] %sha2_choice - // stack: Ch(e, f, g), Sigma_1(e), h, K[i], W[i] - %add_u32 - %add_u32 - %add_u32 - %add_u32 + // stack: Ch(e, f, g), e, h, K[i], W[i] + SWAP1 + // stack: e, Ch(e, f, g), h, K[i], W[i] + %sha2_bigsigma_1 + // stack: Sigma_1(e), Ch(e, f, g), h, K[i], W[i] + ADD + ADD + ADD + ADD // stack: Ch(e, f, g) + Sigma_1(e) + h + K[i] + W[i] %endmacro @@ -27,6 +25,6 @@ // stack: c, a, b, Sigma_0(a) %sha2_majority // stack: Maj(c, a, b), Sigma_0(a) - %add_u32 + ADD // stack: Maj(c, a, b) + Sigma_0(a) %endmacro diff --git a/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/write_length.asm b/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/write_length.asm index 9c2707b8d..a2a0216a6 100644 --- a/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/write_length.asm +++ b/evm_arithmetization/src/cpu/kernel/asm/hash/sha2/write_length.asm @@ -3,14 +3,13 @@ %build_current_general_address SWAP1 // stack: length, last_addr - DUP1 - // stack: length, length, last_addr + DUP2 + DUP2 + // stack: length, last_addr, length, last_addr %and_const(0xff) - // stack: length % (1 << 8), length, last_addr - DUP3 - // stack: last_addr, length % (1 << 8), length, last_addr - %swap_mstore - + // stack: length % (1 << 8), last_addr, length, last_addr + MSTORE_GENERAL + %rep 7 // For i = 0 to 6 // stack: length >> (8 * i), last_addr - i - 1 @@ -20,14 +19,13 @@ // stack: length >> (8 * i), last_addr - i - 2 %shr_const(8) // stack: length >> (8 * (i + 1)), last_addr - i - 2 - PUSH 256 DUP2 - // stack: length >> (8 * (i + 1)), 256, length >> (8 * (i + 1)), last_addr - i - 2 - MOD - // stack: (length >> (8 * (i + 1))) % (1 << 8), length >> (8 * (i + 1)), last_addr - i - 2 + PUSH 256 DUP3 - // stack: last_addr - i - 2, (length >> (8 * (i + 1))) % (1 << 8), length >> (8 * (i + 1)), last_addr - i - 2 - %swap_mstore + // stack: length >> (8 * (i + 1)), 256, last_addr - i - 2, length >> (8 * (i + 1)), last_addr - i - 2 + MOD + // stack: (length >> (8 * (i + 1))) % (1 << 8), last_addr - i - 2, length >> (8 * (i + 1)), last_addr - i - 2 + MSTORE_GENERAL %endrep %pop2