From 84008fbf418b9b6307172c2b3a9d0fbd8e7f3d80 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 22 Apr 2021 15:37:09 +1000 Subject: [PATCH 1/5] arty: Change shield I/O pin bus into individual signals Make the shield I/O pins be individual signals rather than a bus in order to avoid warnings on pins which don't have both a driver and a receiver. Signed-off-by: Paul Mackerras --- fpga/arty_a7.xdc | 66 +++++++++---------- fpga/top-arty.vhdl | 161 +++++++++++++++++++++++++++------------------ 2 files changed, 129 insertions(+), 98 deletions(-) diff --git a/fpga/arty_a7.xdc b/fpga/arty_a7.xdc index 2a011617..622b24db 100644 --- a/fpga/arty_a7.xdc +++ b/fpga/arty_a7.xdc @@ -147,39 +147,39 @@ set_property IOB true [get_cells -hierarchical -filter {NAME =~*.litesdcard/sdca # Arduino/chipKIT shield connector ################################################################################ -set_property -dict { PACKAGE_PIN V15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[0] }]; -set_property -dict { PACKAGE_PIN U16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[1] }]; -set_property -dict { PACKAGE_PIN P14 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[2] }]; -set_property -dict { PACKAGE_PIN T11 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[3] }]; -set_property -dict { PACKAGE_PIN R12 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[4] }]; -set_property -dict { PACKAGE_PIN T14 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[5] }]; -set_property -dict { PACKAGE_PIN T15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[6] }]; -set_property -dict { PACKAGE_PIN T16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[7] }]; -set_property -dict { PACKAGE_PIN N15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[8] }]; -set_property -dict { PACKAGE_PIN M16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[9] }]; -set_property -dict { PACKAGE_PIN V17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[10] }]; -set_property -dict { PACKAGE_PIN U18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[11] }]; -set_property -dict { PACKAGE_PIN R17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[12] }]; -set_property -dict { PACKAGE_PIN P17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[13] }]; -set_property -dict { PACKAGE_PIN U11 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[26] }]; -set_property -dict { PACKAGE_PIN V16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[27] }]; -set_property -dict { PACKAGE_PIN M13 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[28] }]; -set_property -dict { PACKAGE_PIN R10 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[29] }]; -set_property -dict { PACKAGE_PIN R11 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[30] }]; -set_property -dict { PACKAGE_PIN R13 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[31] }]; -set_property -dict { PACKAGE_PIN R15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[32] }]; -set_property -dict { PACKAGE_PIN P15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[33] }]; -set_property -dict { PACKAGE_PIN R16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[34] }]; -set_property -dict { PACKAGE_PIN N16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[35] }]; -set_property -dict { PACKAGE_PIN N14 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[36] }]; -set_property -dict { PACKAGE_PIN U17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[37] }]; -set_property -dict { PACKAGE_PIN T18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[38] }]; -set_property -dict { PACKAGE_PIN R18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[39] }]; -set_property -dict { PACKAGE_PIN P18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[40] }]; -set_property -dict { PACKAGE_PIN N17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[41] }]; -set_property -dict { PACKAGE_PIN M17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[42] }]; # A -set_property -dict { PACKAGE_PIN L18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[43] }]; # SCL -set_property -dict { PACKAGE_PIN M18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io[44] }]; # SDA +set_property -dict { PACKAGE_PIN V15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io0 }]; +set_property -dict { PACKAGE_PIN U16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io1 }]; +set_property -dict { PACKAGE_PIN P14 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io2 }]; +set_property -dict { PACKAGE_PIN T11 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io3 }]; +set_property -dict { PACKAGE_PIN R12 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io4 }]; +set_property -dict { PACKAGE_PIN T14 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io5 }]; +set_property -dict { PACKAGE_PIN T15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io6 }]; +set_property -dict { PACKAGE_PIN T16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io7 }]; +set_property -dict { PACKAGE_PIN N15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io8 }]; +set_property -dict { PACKAGE_PIN M16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io9 }]; +set_property -dict { PACKAGE_PIN V17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io10 }]; +set_property -dict { PACKAGE_PIN U18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io11 }]; +set_property -dict { PACKAGE_PIN R17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io12 }]; +set_property -dict { PACKAGE_PIN P17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io13 }]; +set_property -dict { PACKAGE_PIN U11 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io26 }]; +set_property -dict { PACKAGE_PIN V16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io27 }]; +set_property -dict { PACKAGE_PIN M13 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io28 }]; +set_property -dict { PACKAGE_PIN R10 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io29 }]; +set_property -dict { PACKAGE_PIN R11 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io30 }]; +set_property -dict { PACKAGE_PIN R13 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io31 }]; +set_property -dict { PACKAGE_PIN R15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io32 }]; +set_property -dict { PACKAGE_PIN P15 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io33 }]; +set_property -dict { PACKAGE_PIN R16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io34 }]; +set_property -dict { PACKAGE_PIN N16 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io35 }]; +set_property -dict { PACKAGE_PIN N14 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io36 }]; +set_property -dict { PACKAGE_PIN U17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io37 }]; +set_property -dict { PACKAGE_PIN T18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io38 }]; +set_property -dict { PACKAGE_PIN R18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io39 }]; +set_property -dict { PACKAGE_PIN P18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io40 }]; +set_property -dict { PACKAGE_PIN N17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io41 }]; +set_property -dict { PACKAGE_PIN M17 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io42 }]; # A +set_property -dict { PACKAGE_PIN L18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io43 }]; # SCL +set_property -dict { PACKAGE_PIN M18 IOSTANDARD LVCMOS33 PULLDOWN TRUE } [get_ports { shield_io44 }]; # SDA #set_property -dict { PACKAGE_PIN C2 IOSTANDARD LVCMOS33 } [get_ports { shield_rst }]; #set_property -dict { PACKAGE_PIN C1 IOSTANDARD LVCMOS33 } [get_ports { spi_hdr_ss }]; diff --git a/fpga/top-arty.vhdl b/fpga/top-arty.vhdl index e64eba1a..dc5a0fe5 100644 --- a/fpga/top-arty.vhdl +++ b/fpga/top-arty.vhdl @@ -67,7 +67,38 @@ entity toplevel is sw3 : in std_ulogic; -- GPIO - shield_io : inout std_ulogic_vector(44 downto 0); + shield_io0 : inout std_ulogic; + shield_io1 : inout std_ulogic; + shield_io2 : inout std_ulogic; + shield_io3 : inout std_ulogic; + shield_io4 : inout std_ulogic; + shield_io5 : inout std_ulogic; + shield_io6 : inout std_ulogic; + shield_io7 : inout std_ulogic; + shield_io8 : inout std_ulogic; + shield_io9 : inout std_ulogic; + shield_io10 : inout std_ulogic; + shield_io11 : inout std_ulogic; + shield_io12 : inout std_ulogic; + shield_io13 : inout std_ulogic; + shield_io26 : inout std_ulogic; + shield_io27 : inout std_ulogic; + shield_io28 : inout std_ulogic; + shield_io29 : inout std_ulogic; + shield_io30 : inout std_ulogic; + shield_io31 : inout std_ulogic; + shield_io32 : inout std_ulogic; + shield_io33 : inout std_ulogic; + shield_io34 : inout std_ulogic; + shield_io35 : inout std_ulogic; + shield_io36 : inout std_ulogic; + shield_io37 : inout std_ulogic; + shield_io38 : inout std_ulogic; + shield_io39 : inout std_ulogic; + shield_io40 : inout std_ulogic; + shield_io41 : inout std_ulogic; + shield_io43 : inout std_ulogic; + shield_io44 : inout std_ulogic; -- Ethernet eth_ref_clk : out std_ulogic; @@ -718,38 +749,38 @@ begin gpio_in(16) <= sw2; gpio_in(17) <= sw3; - gpio_in(0) <= shield_io(10); - gpio_in(1) <= shield_io(11); - gpio_in(2) <= shield_io(12); - gpio_in(3) <= shield_io(13); - gpio_in(4) <= shield_io(26); - gpio_in(5) <= shield_io(27); - gpio_in(6) <= shield_io(28); - gpio_in(7) <= shield_io(29); - gpio_in(8) <= shield_io(8); - gpio_in(9) <= shield_io(9); - --gpio_in(10) <= shield_io(10); - --gpio_in(11) <= shield_io(11); - --gpio_in(12) <= shield_io(12); - --gpio_in(13) <= shield_io(13); - --gpio_in(14) <= shield_io(26); - --gpio_in(15) <= shield_io(27); - --gpio_in(16) <= shield_io(28); - --gpio_in(17) <= shield_io(29); - gpio_in(18) <= shield_io(30); - gpio_in(19) <= shield_io(31); - gpio_in(20) <= shield_io(32); - gpio_in(21) <= shield_io(33); - gpio_in(22) <= shield_io(34); - gpio_in(23) <= shield_io(35); - gpio_in(24) <= shield_io(36); - gpio_in(25) <= shield_io(37); - gpio_in(26) <= shield_io(38); - gpio_in(27) <= shield_io(39); - gpio_in(28) <= shield_io(40); - gpio_in(29) <= shield_io(41); - gpio_in(30) <= shield_io(43); - gpio_in(31) <= shield_io(44); + gpio_in(0) <= shield_io10; + gpio_in(1) <= shield_io11; + gpio_in(2) <= shield_io12; + gpio_in(3) <= shield_io13; + gpio_in(4) <= shield_io26; + gpio_in(5) <= shield_io27; + gpio_in(6) <= shield_io28; + gpio_in(7) <= shield_io29; + gpio_in(8) <= shield_io8; + gpio_in(9) <= shield_io9; + --gpio_in(10) <= shield_io10; + --gpio_in(11) <= shield_io11; + --gpio_in(12) <= shield_io12; + --gpio_in(13) <= shield_io13; + --gpio_in(14) <= shield_io26; + --gpio_in(15) <= shield_io27; + --gpio_in(16) <= shield_io28; + --gpio_in(17) <= shield_io29; + gpio_in(18) <= shield_io30; + gpio_in(19) <= shield_io31; + gpio_in(20) <= shield_io32; + gpio_in(21) <= shield_io33; + gpio_in(22) <= shield_io34; + gpio_in(23) <= shield_io35; + gpio_in(24) <= shield_io36; + gpio_in(25) <= shield_io37; + gpio_in(26) <= shield_io38; + gpio_in(27) <= shield_io39; + gpio_in(28) <= shield_io40; + gpio_in(29) <= shield_io41; + gpio_in(30) <= shield_io43; + gpio_in(31) <= shield_io44; led_b_pwm(1) <= gpio_out(0) when gpio_dir(0) = '1' else 'Z'; led_g_pwm(1) <= gpio_out(1) when gpio_dir(1) = '1' else 'Z'; @@ -763,37 +794,37 @@ begin led_g_pwm(3) <= gpio_out(7) when gpio_dir(7) = '1' else 'Z'; led_r_pwm(3) <= gpio_out(8) when gpio_dir(8) = '1' else 'Z'; - --shield_io(0) <= gpio_out(0) when gpio_dir(0) = '1' else 'Z'; - --shield_io(1) <= gpio_out(1) when gpio_dir(1) = '1' else 'Z'; - --shield_io(2) <= gpio_out(2) when gpio_dir(2) = '1' else 'Z'; - --shield_io(3) <= gpio_out(3) when gpio_dir(3) = '1' else 'Z'; - --shield_io(4) <= gpio_out(4) when gpio_dir(4) = '1' else 'Z'; - --shield_io(5) <= gpio_out(5) when gpio_dir(5) = '1' else 'Z'; - --shield_io(6) <= gpio_out(6) when gpio_dir(6) = '1' else 'Z'; - --shield_io(7) <= gpio_out(7) when gpio_dir(7) = '1' else 'Z'; - --shield_io(8) <= gpio_out(8) when gpio_dir(8) = '1' else 'Z'; - shield_io(9) <= gpio_out(9) when gpio_dir(9) = '1' else 'Z'; - shield_io(10) <= gpio_out(10) when gpio_dir(10) = '1' else 'Z'; - shield_io(11) <= gpio_out(11) when gpio_dir(11) = '1' else 'Z'; - shield_io(12) <= gpio_out(12) when gpio_dir(12) = '1' else 'Z'; - shield_io(13) <= gpio_out(13) when gpio_dir(13) = '1' else 'Z'; - shield_io(26) <= gpio_out(14) when gpio_dir(14) = '1' else 'Z'; - shield_io(27) <= gpio_out(15) when gpio_dir(15) = '1' else 'Z'; - shield_io(28) <= gpio_out(16) when gpio_dir(16) = '1' else 'Z'; - shield_io(29) <= gpio_out(17) when gpio_dir(17) = '1' else 'Z'; - shield_io(30) <= gpio_out(18) when gpio_dir(18) = '1' else 'Z'; - shield_io(31) <= gpio_out(19) when gpio_dir(19) = '1' else 'Z'; - shield_io(32) <= gpio_out(20) when gpio_dir(20) = '1' else 'Z'; - shield_io(33) <= gpio_out(21) when gpio_dir(21) = '1' else 'Z'; - shield_io(34) <= gpio_out(22) when gpio_dir(22) = '1' else 'Z'; - shield_io(35) <= gpio_out(23) when gpio_dir(23) = '1' else 'Z'; - shield_io(36) <= gpio_out(24) when gpio_dir(24) = '1' else 'Z'; - shield_io(37) <= gpio_out(25) when gpio_dir(25) = '1' else 'Z'; - shield_io(38) <= gpio_out(26) when gpio_dir(26) = '1' else 'Z'; - shield_io(39) <= gpio_out(27) when gpio_dir(27) = '1' else 'Z'; - shield_io(40) <= gpio_out(28) when gpio_dir(28) = '1' else 'Z'; - shield_io(41) <= gpio_out(29) when gpio_dir(29) = '1' else 'Z'; - shield_io(43) <= gpio_out(30) when gpio_dir(30) = '1' else 'Z'; - shield_io(44) <= gpio_out(31) when gpio_dir(31) = '1' else 'Z'; + --shield_io0 <= gpio_out(0) when gpio_dir(0) = '1' else 'Z'; + --shield_io1 <= gpio_out(1) when gpio_dir(1) = '1' else 'Z'; + --shield_io2 <= gpio_out(2) when gpio_dir(2) = '1' else 'Z'; + --shield_io3 <= gpio_out(3) when gpio_dir(3) = '1' else 'Z'; + --shield_io4 <= gpio_out(4) when gpio_dir(4) = '1' else 'Z'; + --shield_io5 <= gpio_out(5) when gpio_dir(5) = '1' else 'Z'; + --shield_io6 <= gpio_out(6) when gpio_dir(6) = '1' else 'Z'; + --shield_io7 <= gpio_out(7) when gpio_dir(7) = '1' else 'Z'; + --shield_io8 <= gpio_out(8) when gpio_dir(8) = '1' else 'Z'; + shield_io9 <= gpio_out(9) when gpio_dir(9) = '1' else 'Z'; + shield_io10 <= gpio_out(10) when gpio_dir(10) = '1' else 'Z'; + shield_io11 <= gpio_out(11) when gpio_dir(11) = '1' else 'Z'; + shield_io12 <= gpio_out(12) when gpio_dir(12) = '1' else 'Z'; + shield_io13 <= gpio_out(13) when gpio_dir(13) = '1' else 'Z'; + shield_io26 <= gpio_out(14) when gpio_dir(14) = '1' else 'Z'; + shield_io27 <= gpio_out(15) when gpio_dir(15) = '1' else 'Z'; + shield_io28 <= gpio_out(16) when gpio_dir(16) = '1' else 'Z'; + shield_io29 <= gpio_out(17) when gpio_dir(17) = '1' else 'Z'; + shield_io30 <= gpio_out(18) when gpio_dir(18) = '1' else 'Z'; + shield_io31 <= gpio_out(19) when gpio_dir(19) = '1' else 'Z'; + shield_io32 <= gpio_out(20) when gpio_dir(20) = '1' else 'Z'; + shield_io33 <= gpio_out(21) when gpio_dir(21) = '1' else 'Z'; + shield_io34 <= gpio_out(22) when gpio_dir(22) = '1' else 'Z'; + shield_io35 <= gpio_out(23) when gpio_dir(23) = '1' else 'Z'; + shield_io36 <= gpio_out(24) when gpio_dir(24) = '1' else 'Z'; + shield_io37 <= gpio_out(25) when gpio_dir(25) = '1' else 'Z'; + shield_io38 <= gpio_out(26) when gpio_dir(26) = '1' else 'Z'; + shield_io39 <= gpio_out(27) when gpio_dir(27) = '1' else 'Z'; + shield_io40 <= gpio_out(28) when gpio_dir(28) = '1' else 'Z'; + shield_io41 <= gpio_out(29) when gpio_dir(29) = '1' else 'Z'; + shield_io43 <= gpio_out(30) when gpio_dir(30) = '1' else 'Z'; + shield_io44 <= gpio_out(31) when gpio_dir(31) = '1' else 'Z'; end architecture behaviour; From 8c5dabd67f7cb1d8cd3a95a8c643c1eda48d8035 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 20 Jan 2022 08:36:10 +1100 Subject: [PATCH 2/5] dcache: Make r1.acks_pending independent of r1.state With this, the logic that maintains r1.acks_pending operates in every state based on r1.wb and wishbone_in, rather than only operating in STORE_WAIT_ACK state. This makes things a bit clearer and improves timing slightly. Signed-off-by: Paul Mackerras --- dcache.vhdl | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/dcache.vhdl b/dcache.vhdl index 08b17f81..11f563f7 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -341,7 +341,7 @@ architecture rtl of dcache is end_row_ix : row_in_line_t; rows_valid : row_per_line_valid_t; acks_pending : unsigned(2 downto 0); - inc_acks : std_ulogic; + stalled : std_ulogic; dec_acks : std_ulogic; choose_victim : std_ulogic; victim_way : way_t; @@ -1414,6 +1414,9 @@ begin r1.wb.stb <= '0'; r1.ls_valid <= '0'; r1.mmu_done <= '0'; + r1.acks_pending <= to_unsigned(0, 3); + r1.stalled <= '0'; + r1.dec_acks <= '0'; -- Not useful normally but helps avoiding tons of sim warnings r1.wb.adr <= (others => '0'); @@ -1421,8 +1424,6 @@ begin -- One cycle pulses reset r1.slow_valid <= '0'; r1.write_bram <= '0'; - r1.inc_acks <= '0'; - r1.dec_acks <= '0'; r1.ls_valid <= '0'; -- complete tlbies and TLB loads in the third cycle @@ -1509,6 +1510,19 @@ begin r1.choose_victim <= '1'; end if; + -- Update count of pending acks + acks := r1.acks_pending; + if r1.wb.cyc = '0' then + acks := to_unsigned(0, 3); + elsif r1.wb.stb = '1' and r1.stalled = '0' and r1.dec_acks = '0' then + acks := acks + 1; + elsif (r1.wb.stb = '0' or r1.stalled = '1') and r1.dec_acks = '1' then + acks := acks - 1; + end if; + r1.acks_pending <= acks; + r1.stalled <= wishbone_in.stall and r1.wb.cyc; + r1.dec_acks <= wishbone_in.ack and r1.wb.cyc; + -- Main state machine case r1.state is when IDLE => @@ -1563,7 +1577,6 @@ begin when OP_STORE_HIT | OP_STORE_MISS => if req.dcbz = '0' then r1.state <= STORE_WAIT_ACK; - r1.acks_pending <= to_unsigned(1, 3); r1.full <= '0'; r1.slow_valid <= '1'; if req.mmu_req = '0' then @@ -1657,15 +1670,6 @@ begin when STORE_WAIT_ACK => stbs_done := r1.wb.stb = '0'; - acks := r1.acks_pending; - if r1.inc_acks /= r1.dec_acks then - if r1.inc_acks = '1' then - acks := acks + 1; - else - acks := acks - 1; - end if; - end if; - r1.acks_pending <= acks; -- Clear stb when slave accepted request if wishbone_in.stall = '0' then -- See if there is another store waiting to be done @@ -1691,7 +1695,6 @@ begin -- Store requests never come from the MMU r1.ls_valid <= '1'; stbs_done := false; - r1.inc_acks <= '1'; else r1.wb.stb <= '0'; stbs_done := true; @@ -1706,7 +1709,6 @@ begin r1.wb.cyc <= '0'; r1.wb.stb <= '0'; end if; - r1.dec_acks <= '1'; end if; when NC_LOAD_WAIT_ACK => From 9c3d14dd5aa4ef82604759909fa6fab25550227d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 26 Jul 2023 15:49:12 +1000 Subject: [PATCH 3/5] dcache: Make reading of DTLB independent of d_in.valid This improves timing. Signed-off-by: Paul Mackerras --- dcache.vhdl | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/dcache.vhdl b/dcache.vhdl index 11f563f7..c9541e53 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -633,14 +633,20 @@ begin addrbits := d_in.addr(TLB_LG_PGSZ + TLB_SET_BITS - 1 downto TLB_LG_PGSZ); valid := d_in.valid; end if; - -- If we have any op and the previous op isn't finished, + -- If the previous op isn't finished, -- then keep the same output for next cycle. - if r0_stall = '0' and valid = '1' then - assert not is_X(addrbits); - index := to_integer(unsigned(addrbits)); - tlb_valid_way <= dtlb_valids(index); - tlb_tag_way <= dtlb_tags(index); - tlb_pte_way <= dtlb_ptes(index); + if r0_stall = '0' then + assert not (valid = '1' and is_X(addrbits)); + if is_X(addrbits) then + tlb_valid_way <= (others => 'X'); + tlb_tag_way <= (others => 'X'); + tlb_pte_way <= (others => 'X'); + else + index := to_integer(unsigned(addrbits)); + tlb_valid_way <= dtlb_valids(index); + tlb_tag_way <= dtlb_tags(index); + tlb_pte_way <= dtlb_ptes(index); + end if; end if; if rst = '1' then tlb_read_valid <= '0'; From a2890745d5378029b687161d92bc5b0078dcc305 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 26 Jul 2023 16:31:05 +1000 Subject: [PATCH 4/5] Makefile: Remove long micropython test from check_light It takes a very long time, so remove it from the "light" check. Signed-off-by: Paul Mackerras --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 49f7cc17..bf928da3 100644 --- a/Makefile +++ b/Makefile @@ -291,7 +291,7 @@ check_vunit: check: $(tests) tests_console test_micropython test_micropython_long tests_unit -check_light: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 test_micropython test_micropython_long tests_console tests_unit +check_light: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 test_micropython tests_console tests_unit $(tests): core_tb @./scripts/run_test.sh $@ From 4bef477e29e9bcbb8558d3d5f4247dfb5cf8cc8c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 28 Aug 2023 12:22:31 +1000 Subject: [PATCH 5/5] core_debug: Add support for detecting writes to a memory address This adds a new type of stop trigger for the log buffer which triggers when any byte(s) of a specified doubleword of memory are written. The trigger logic snoops the wishbone for writes to the address specified and stops the log 256 cycles later (same as for the instruction fetch address trigger). The trigger address is a real address and sees DMA writes from devices as well as stores done by the CPU. The mw_debug command has a new 'mtrig' subcommand to set the trigger and query its state. Signed-off-by: Paul Mackerras --- core.vhdl | 1 + core_debug.vhdl | 34 +++++++++++++++++++++++++++++++--- scripts/mw_debug/mw_debug.c | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 3 deletions(-) diff --git a/core.vhdl b/core.vhdl index aef2d7f2..a5560690 100644 --- a/core.vhdl +++ b/core.vhdl @@ -521,6 +521,7 @@ begin core_stopped => dbg_core_is_stopped, nia => fetch1_to_icache.nia, msr => ctrl_debug.msr, + wb_snoop_in => wb_snoop_in, dbg_gpr_req => dbg_gpr_req, dbg_gpr_ack => dbg_gpr_ack, dbg_gpr_addr => dbg_gpr_addr, diff --git a/core_debug.vhdl b/core_debug.vhdl index afebc7c4..c7215ff4 100644 --- a/core_debug.vhdl +++ b/core_debug.vhdl @@ -5,6 +5,7 @@ use ieee.numeric_std.all; library work; use work.utils.all; use work.common.all; +use work.wishbone_types.all; entity core_debug is generic ( @@ -32,6 +33,7 @@ entity core_debug is core_stopped : in std_ulogic; nia : in std_ulogic_vector(63 downto 0); msr : in std_ulogic_vector(63 downto 0); + wb_snoop_in : in wishbone_master_out := wishbone_master_out_init; -- GPR/FPR register read port dbg_gpr_req : out std_ulogic; @@ -104,6 +106,7 @@ architecture behave of core_debug is constant DBG_CORE_LOG_ADDR : std_ulogic_vector(3 downto 0) := "0110"; constant DBG_CORE_LOG_DATA : std_ulogic_vector(3 downto 0) := "0111"; constant DBG_CORE_LOG_TRIGGER : std_ulogic_vector(3 downto 0) := "1000"; + constant DBG_CORE_LOG_MTRIGGER : std_ulogic_vector(3 downto 0) := "1001"; constant LOG_INDEX_BITS : natural := log2(LOG_LENGTH); @@ -125,7 +128,11 @@ architecture behave of core_debug is signal log_dmi_addr : std_ulogic_vector(31 downto 0) := (others => '0'); signal log_dmi_data : std_ulogic_vector(63 downto 0) := (others => '0'); signal log_dmi_trigger : std_ulogic_vector(63 downto 0) := (others => '0'); + signal log_mem_trigger : std_ulogic_vector(63 downto 0) := (others => '0'); signal do_log_trigger : std_ulogic := '0'; + signal do_log_mtrigger : std_ulogic := '0'; + signal trigger_was_log : std_ulogic := '0'; + signal trigger_was_mem : std_ulogic := '0'; signal do_dmi_log_rd : std_ulogic; signal dmi_read_log_data : std_ulogic; signal dmi_read_log_data_1 : std_ulogic; @@ -156,6 +163,7 @@ begin log_write_addr & log_dmi_addr when DBG_CORE_LOG_ADDR, log_dmi_data when DBG_CORE_LOG_DATA, log_dmi_trigger when DBG_CORE_LOG_TRIGGER, + log_mem_trigger when DBG_CORE_LOG_MTRIGGER, (others => '0') when others; -- DMI writes @@ -174,16 +182,27 @@ begin log_trigger_delay <= 0; gspr_index <= (others => '0'); log_dmi_addr <= (others => '0'); + trigger_was_log <= '0'; + trigger_was_mem <= '0'; else - if do_log_trigger = '1' or log_trigger_delay /= 0 then + if do_log_trigger = '1' or do_log_mtrigger = '1' or log_trigger_delay /= 0 then if log_trigger_delay = 255 or (LOG_LENGTH < 1024 and log_trigger_delay = LOG_LENGTH / 4) then - log_dmi_trigger(1) <= '1'; + log_dmi_trigger(1) <= trigger_was_log; + log_mem_trigger(1) <= trigger_was_mem; log_trigger_delay <= 0; + trigger_was_log <= '0'; + trigger_was_mem <= '0'; else log_trigger_delay <= log_trigger_delay + 1; end if; end if; + if do_log_trigger = '1' then + trigger_was_log <= '1'; + end if; + if do_log_mtrigger = '1' then + trigger_was_mem <= '1'; + end if; -- Edge detect on dmi_req for 1-shot pulses dmi_req_1 <= dmi_req; if dmi_req = '1' and dmi_req_1 = '0' then @@ -217,6 +236,8 @@ begin do_dmi_log_rd <= '1'; elsif dmi_addr = DBG_CORE_LOG_TRIGGER then log_dmi_trigger <= dmi_din; + elsif dmi_addr = DBG_CORE_LOG_MTRIGGER then + log_mem_trigger <= dmi_din; end if; else report("DMI read from " & to_string(dmi_addr)); @@ -347,7 +368,7 @@ begin begin -- Use MSB of read addresses to stop the logging - log_wr_enable <= not (log_read_addr(31) or log_dmi_addr(31) or log_dmi_trigger(1)); + log_wr_enable <= not (log_read_addr(31) or log_dmi_addr(31) or log_dmi_trigger(1) or log_mem_trigger(1)); log_ram: process(clk) begin @@ -398,6 +419,13 @@ begin log_dmi_trigger(0) = '1' then do_log_trigger <= '1'; end if; + do_log_mtrigger <= '0'; + if (wb_snoop_in.cyc and wb_snoop_in.stb and wb_snoop_in.we) = '1' and + wb_snoop_in.adr = log_mem_trigger(wishbone_addr_bits + wishbone_log2_width - 1 + downto wishbone_log2_width) and + log_mem_trigger(0) = '1' then + do_log_mtrigger <= '1'; + end if; end if; end process; log_write_addr(LOG_INDEX_BITS - 1 downto 0) <= std_ulogic_vector(log_wr_ptr); diff --git a/scripts/mw_debug/mw_debug.c b/scripts/mw_debug/mw_debug.c index 81e80941..07c10566 100644 --- a/scripts/mw_debug/mw_debug.c +++ b/scripts/mw_debug/mw_debug.c @@ -45,6 +45,7 @@ #define DBG_LOG_ADDR 0x16 #define DBG_LOG_DATA 0x17 #define DBG_LOG_TRIGGER 0x18 +#define DBG_LOG_MTRIGGER 0x19 static bool debug; @@ -766,6 +767,28 @@ static void ltrig_set(uint64_t addr) check(dmi_write(DBG_LOG_TRIGGER, (addr & ~(uint64_t)2) | 1), "writing LOG_TRIGGER"); } +static void mtrig_show(void) +{ + uint64_t trig; + + check(dmi_read(DBG_LOG_MTRIGGER, &trig), "reading LOG_MTRIGGER"); + if (trig & 1) + printf("log memory stop trigger at %" PRIx64, trig & ~3); + else + printf("log memory stop trigger disabled"); + printf(", %striggered\n", (trig & 2? "": "not ")); +} + +static void mtrig_off(void) +{ + check(dmi_write(DBG_LOG_MTRIGGER, 0), "writing LOG_MTRIGGER"); +} + +static void mtrig_set(uint64_t addr) +{ + check(dmi_write(DBG_LOG_MTRIGGER, (addr & ~(uint64_t)2) | 1), "writing LOG_MTRIGGER"); +} + static void usage(const char *cmd) { fprintf(stderr, "Usage: %s -b \n", cmd); @@ -798,6 +821,9 @@ static void usage(const char *cmd) fprintf(stderr, " ltrig show logging stop trigger status\n"); fprintf(stderr, " ltrig off clear logging stop trigger address\n"); fprintf(stderr, " ltrig set logging stop trigger address\n"); + fprintf(stderr, " mtrig show logging stop trigger status\n"); + fprintf(stderr, " mtrig off clear logging stop trigger address\n"); + fprintf(stderr, " mtrig set logging stop trigger address\n"); fprintf(stderr, "\n"); fprintf(stderr, " JTAG:\n"); @@ -967,6 +993,17 @@ int main(int argc, char *argv[]) addr = strtoul(argv[i], NULL, 16); ltrig_set(addr); } + } else if (strcmp(argv[i], "mtrig") == 0) { + uint64_t addr; + + if ((i+1) >= argc) + mtrig_show(); + else if (strcmp(argv[++i], "off") == 0) + mtrig_off(); + else { + addr = strtoul(argv[i], NULL, 16); + mtrig_set(addr); + } } else { fprintf(stderr, "Unknown command %s\n", argv[i]); usage(argv[0]);