diff --git a/hw/core-v-mini-mcu/core_v_mini_mcu.sv b/hw/core-v-mini-mcu/core_v_mini_mcu.sv index 0dc0a480c..9248d2306 100644 --- a/hw/core-v-mini-mcu/core_v_mini_mcu.sv +++ b/hw/core-v-mini-mcu/core_v_mini_mcu.sv @@ -459,6 +459,66 @@ module core_v_mini_mcu assign memory_subsystem_banks_powergate_iso_n[1] = memory_subsystem_pwr_ctrl_out[1].isogate_en_n; assign memory_subsystem_banks_set_retentive_n[1] = memory_subsystem_pwr_ctrl_out[1].retentive_en_n; assign memory_subsystem_clkgate_en_n[1] = memory_subsystem_pwr_ctrl_out[1].clkgate_en_n; + assign memory_subsystem_banks_powergate_switch_n[2] = memory_subsystem_pwr_ctrl_out[2].pwrgate_en_n; + assign memory_subsystem_pwr_ctrl_in[2].pwrgate_ack_n = memory_subsystem_banks_powergate_switch_ack_n[2]; + //isogate exposed outside for UPF sim flow and switch cells + assign memory_subsystem_banks_powergate_iso_n[2] = memory_subsystem_pwr_ctrl_out[2].isogate_en_n; + assign memory_subsystem_banks_set_retentive_n[2] = memory_subsystem_pwr_ctrl_out[2].retentive_en_n; + assign memory_subsystem_clkgate_en_n[2] = memory_subsystem_pwr_ctrl_out[2].clkgate_en_n; + assign memory_subsystem_banks_powergate_switch_n[3] = memory_subsystem_pwr_ctrl_out[3].pwrgate_en_n; + assign memory_subsystem_pwr_ctrl_in[3].pwrgate_ack_n = memory_subsystem_banks_powergate_switch_ack_n[3]; + //isogate exposed outside for UPF sim flow and switch cells + assign memory_subsystem_banks_powergate_iso_n[3] = memory_subsystem_pwr_ctrl_out[3].isogate_en_n; + assign memory_subsystem_banks_set_retentive_n[3] = memory_subsystem_pwr_ctrl_out[3].retentive_en_n; + assign memory_subsystem_clkgate_en_n[3] = memory_subsystem_pwr_ctrl_out[3].clkgate_en_n; + assign memory_subsystem_banks_powergate_switch_n[4] = memory_subsystem_pwr_ctrl_out[4].pwrgate_en_n; + assign memory_subsystem_pwr_ctrl_in[4].pwrgate_ack_n = memory_subsystem_banks_powergate_switch_ack_n[4]; + //isogate exposed outside for UPF sim flow and switch cells + assign memory_subsystem_banks_powergate_iso_n[4] = memory_subsystem_pwr_ctrl_out[4].isogate_en_n; + assign memory_subsystem_banks_set_retentive_n[4] = memory_subsystem_pwr_ctrl_out[4].retentive_en_n; + assign memory_subsystem_clkgate_en_n[4] = memory_subsystem_pwr_ctrl_out[4].clkgate_en_n; + assign memory_subsystem_banks_powergate_switch_n[5] = memory_subsystem_pwr_ctrl_out[5].pwrgate_en_n; + assign memory_subsystem_pwr_ctrl_in[5].pwrgate_ack_n = memory_subsystem_banks_powergate_switch_ack_n[5]; + //isogate exposed outside for UPF sim flow and switch cells + assign memory_subsystem_banks_powergate_iso_n[5] = memory_subsystem_pwr_ctrl_out[5].isogate_en_n; + assign memory_subsystem_banks_set_retentive_n[5] = memory_subsystem_pwr_ctrl_out[5].retentive_en_n; + assign memory_subsystem_clkgate_en_n[5] = memory_subsystem_pwr_ctrl_out[5].clkgate_en_n; + assign memory_subsystem_banks_powergate_switch_n[6] = memory_subsystem_pwr_ctrl_out[6].pwrgate_en_n; + assign memory_subsystem_pwr_ctrl_in[6].pwrgate_ack_n = memory_subsystem_banks_powergate_switch_ack_n[6]; + //isogate exposed outside for UPF sim flow and switch cells + assign memory_subsystem_banks_powergate_iso_n[6] = memory_subsystem_pwr_ctrl_out[6].isogate_en_n; + assign memory_subsystem_banks_set_retentive_n[6] = memory_subsystem_pwr_ctrl_out[6].retentive_en_n; + assign memory_subsystem_clkgate_en_n[6] = memory_subsystem_pwr_ctrl_out[6].clkgate_en_n; + assign memory_subsystem_banks_powergate_switch_n[7] = memory_subsystem_pwr_ctrl_out[7].pwrgate_en_n; + assign memory_subsystem_pwr_ctrl_in[7].pwrgate_ack_n = memory_subsystem_banks_powergate_switch_ack_n[7]; + //isogate exposed outside for UPF sim flow and switch cells + assign memory_subsystem_banks_powergate_iso_n[7] = memory_subsystem_pwr_ctrl_out[7].isogate_en_n; + assign memory_subsystem_banks_set_retentive_n[7] = memory_subsystem_pwr_ctrl_out[7].retentive_en_n; + assign memory_subsystem_clkgate_en_n[7] = memory_subsystem_pwr_ctrl_out[7].clkgate_en_n; + assign memory_subsystem_banks_powergate_switch_n[8] = memory_subsystem_pwr_ctrl_out[8].pwrgate_en_n; + assign memory_subsystem_pwr_ctrl_in[8].pwrgate_ack_n = memory_subsystem_banks_powergate_switch_ack_n[8]; + //isogate exposed outside for UPF sim flow and switch cells + assign memory_subsystem_banks_powergate_iso_n[8] = memory_subsystem_pwr_ctrl_out[8].isogate_en_n; + assign memory_subsystem_banks_set_retentive_n[8] = memory_subsystem_pwr_ctrl_out[8].retentive_en_n; + assign memory_subsystem_clkgate_en_n[8] = memory_subsystem_pwr_ctrl_out[8].clkgate_en_n; + assign memory_subsystem_banks_powergate_switch_n[9] = memory_subsystem_pwr_ctrl_out[9].pwrgate_en_n; + assign memory_subsystem_pwr_ctrl_in[9].pwrgate_ack_n = memory_subsystem_banks_powergate_switch_ack_n[9]; + //isogate exposed outside for UPF sim flow and switch cells + assign memory_subsystem_banks_powergate_iso_n[9] = memory_subsystem_pwr_ctrl_out[9].isogate_en_n; + assign memory_subsystem_banks_set_retentive_n[9] = memory_subsystem_pwr_ctrl_out[9].retentive_en_n; + assign memory_subsystem_clkgate_en_n[9] = memory_subsystem_pwr_ctrl_out[9].clkgate_en_n; + assign memory_subsystem_banks_powergate_switch_n[10] = memory_subsystem_pwr_ctrl_out[10].pwrgate_en_n; + assign memory_subsystem_pwr_ctrl_in[10].pwrgate_ack_n = memory_subsystem_banks_powergate_switch_ack_n[10]; + //isogate exposed outside for UPF sim flow and switch cells + assign memory_subsystem_banks_powergate_iso_n[10] = memory_subsystem_pwr_ctrl_out[10].isogate_en_n; + assign memory_subsystem_banks_set_retentive_n[10] = memory_subsystem_pwr_ctrl_out[10].retentive_en_n; + assign memory_subsystem_clkgate_en_n[10] = memory_subsystem_pwr_ctrl_out[10].clkgate_en_n; + assign memory_subsystem_banks_powergate_switch_n[11] = memory_subsystem_pwr_ctrl_out[11].pwrgate_en_n; + assign memory_subsystem_pwr_ctrl_in[11].pwrgate_ack_n = memory_subsystem_banks_powergate_switch_ack_n[11]; + //isogate exposed outside for UPF sim flow and switch cells + assign memory_subsystem_banks_powergate_iso_n[11] = memory_subsystem_pwr_ctrl_out[11].isogate_en_n; + assign memory_subsystem_banks_set_retentive_n[11] = memory_subsystem_pwr_ctrl_out[11].retentive_en_n; + assign memory_subsystem_clkgate_en_n[11] = memory_subsystem_pwr_ctrl_out[11].clkgate_en_n; for (genvar i = 0; i < EXT_DOMAINS_RND; i = i + 1) begin assign external_subsystem_powergate_switch_no[i] = external_subsystem_pwr_ctrl_out[i].pwrgate_en_n; diff --git a/hw/ip_examples/im2col_spc/im2col_spc.core b/hw/ip_examples/im2col_spc/im2col_spc.core index db8a99d11..e196ccac5 100644 --- a/hw/ip_examples/im2col_spc/im2col_spc.core +++ b/hw/ip_examples/im2col_spc/im2col_spc.core @@ -11,6 +11,7 @@ filesets: files_rtl: depend: - pulp-platform.org::common_cells + - x-heep:ip:dma files: - rtl/im2col_spc_reg_pkg.sv - rtl/im2col_spc_reg_top.sv diff --git a/hw/ip_examples/im2col_spc/rtl/im2col_spc.sv b/hw/ip_examples/im2col_spc/rtl/im2col_spc.sv index 2e3d724ac..adb936e3f 100644 --- a/hw/ip_examples/im2col_spc/rtl/im2col_spc.sv +++ b/hw/ip_examples/im2col_spc/rtl/im2col_spc.sv @@ -33,29 +33,12 @@ module im2col_spc import core_v_mini_mcu_pkg::*; import dma_if_pkg::*; import im2col_spc_reg_pkg::*; + import dma_reg_pkg::*; /*_________________________________________________________________________________________________________________________________ */ /* Parameter definition */ - /* DMA register offsets */ - localparam DMA_DIMENSIONALITY_OFFSET = 32'h3C; - localparam DMA_SRC_PTR_OFFSET = 32'h0; - localparam DMA_DST_PTR_OFFSET = 32'h4; - localparam DMA_INC_SRC_D1_OFFSET = 32'h18; - localparam DMA_INC_SRC_D2_OFFSET = 32'h1C; - localparam DMA_INC_DST_D1_OFFSET = 32'h20; - localparam DMA_INC_DST_D2_OFFSET = 32'h24; - localparam DMA_SIZE_D2_OFFSET = 32'h10; - localparam DMA_SIZE_D1_OFFSET = 32'hC; - localparam DMA_SRC_DATATYPE_OFFSET = 32'h2C; - localparam DMA_DST_DATATYPE_OFFSET = 32'h30; - localparam DMA_TOP_PAD_OFFSET = 32'h44; - localparam DMA_BOTTOM_PAD_OFFSET = 32'h48; - localparam DMA_RIGHT_PAD_OFFSET = 32'h4C; - localparam DMA_LEFT_PAD_OFFSET = 32'h50; - localparam DMA_SLOTS_OFFSET = 32'h28; - /* FIFO dimension */ localparam FIFO_DEPTH = 8; @@ -435,7 +418,7 @@ module im2col_spc dma_wdata = 32'h1; dma_addr = core_v_mini_mcu_pkg::DMA_START_ADDRESS + dma_trans_free_channel * core_v_mini_mcu_pkg::DMA_CH_SIZE + - DMA_DIMENSIONALITY_OFFSET; + {25'h0, dma_reg_pkg::DMA_DIM_CONFIG_OFFSET}; dma_regintfc_start = 1'b1; end @@ -443,7 +426,7 @@ module im2col_spc dma_wdata = {reg2hw.slot.tx_trigger_slot.q, reg2hw.slot.rx_trigger_slot.q}; dma_addr = core_v_mini_mcu_pkg::DMA_START_ADDRESS + dma_trans_free_channel * core_v_mini_mcu_pkg::DMA_CH_SIZE + - DMA_SLOTS_OFFSET; + {25'h0, dma_reg_pkg::DMA_SLOT_OFFSET}; dma_regintfc_start = 1'b1; end @@ -451,7 +434,7 @@ module im2col_spc dma_wdata = {30'h0, reg2hw.data_type.q} & 32'h3; dma_addr = core_v_mini_mcu_pkg::DMA_START_ADDRESS + dma_trans_free_channel * core_v_mini_mcu_pkg::DMA_CH_SIZE + - DMA_SRC_DATATYPE_OFFSET; + {25'h0, dma_reg_pkg::DMA_SRC_DATA_TYPE_OFFSET}; dma_regintfc_start = 1'b1; end @@ -459,7 +442,7 @@ module im2col_spc dma_wdata = {30'h0, reg2hw.data_type.q} & 32'h3; dma_addr = core_v_mini_mcu_pkg::DMA_START_ADDRESS + dma_trans_free_channel * core_v_mini_mcu_pkg::DMA_CH_SIZE + - DMA_DST_DATATYPE_OFFSET; + {25'h0, dma_reg_pkg::DMA_DST_DATA_TYPE_OFFSET}; dma_regintfc_start = 1'b1; end @@ -467,7 +450,7 @@ module im2col_spc dma_wdata = {24'h0, fifo_output.n_zeros_top}; dma_addr = core_v_mini_mcu_pkg::DMA_START_ADDRESS + dma_trans_free_channel * core_v_mini_mcu_pkg::DMA_CH_SIZE + - DMA_TOP_PAD_OFFSET; + {25'h0, dma_reg_pkg::DMA_PAD_TOP_OFFSET}; dma_regintfc_start = 1'b1; end @@ -475,7 +458,7 @@ module im2col_spc dma_wdata = {24'h0, fifo_output.n_zeros_bottom}; dma_addr = core_v_mini_mcu_pkg::DMA_START_ADDRESS + dma_trans_free_channel * core_v_mini_mcu_pkg::DMA_CH_SIZE + - DMA_BOTTOM_PAD_OFFSET; + {25'h0, dma_reg_pkg::DMA_PAD_BOTTOM_OFFSET}; dma_regintfc_start = 1'b1; end @@ -483,7 +466,7 @@ module im2col_spc dma_wdata = {24'h0, fifo_output.n_zeros_left}; dma_addr = core_v_mini_mcu_pkg::DMA_START_ADDRESS + dma_trans_free_channel * core_v_mini_mcu_pkg::DMA_CH_SIZE + - DMA_LEFT_PAD_OFFSET; + {25'h0, dma_reg_pkg::DMA_PAD_LEFT_OFFSET}; dma_regintfc_start = 1'b1; end @@ -491,7 +474,7 @@ module im2col_spc dma_wdata = {24'h0, fifo_output.n_zeros_right}; dma_addr = core_v_mini_mcu_pkg::DMA_START_ADDRESS + dma_trans_free_channel * core_v_mini_mcu_pkg::DMA_CH_SIZE + - DMA_RIGHT_PAD_OFFSET; + {25'h0, dma_reg_pkg::DMA_PAD_RIGHT_OFFSET}; dma_regintfc_start = 1'b1; end @@ -499,7 +482,7 @@ module im2col_spc dma_wdata = fifo_output.input_ptr; dma_addr = core_v_mini_mcu_pkg::DMA_START_ADDRESS + dma_trans_free_channel * core_v_mini_mcu_pkg::DMA_CH_SIZE + - DMA_SRC_PTR_OFFSET; + {25'h0, dma_reg_pkg::DMA_SRC_PTR_OFFSET}; dma_regintfc_start = 1'b1; end @@ -507,7 +490,7 @@ module im2col_spc dma_wdata = fifo_output.output_ptr; dma_addr = core_v_mini_mcu_pkg::DMA_START_ADDRESS + dma_trans_free_channel * core_v_mini_mcu_pkg::DMA_CH_SIZE + - DMA_DST_PTR_OFFSET; + {25'h0, dma_reg_pkg::DMA_DST_PTR_OFFSET}; dma_regintfc_start = 1'b1; end @@ -515,7 +498,7 @@ module im2col_spc dma_wdata = (1 << {28'h0, reg2hw.log_strides_d1.q}) << (2 - reg2hw.data_type.q) & 32'h3f; dma_addr = core_v_mini_mcu_pkg::DMA_START_ADDRESS + dma_trans_free_channel * core_v_mini_mcu_pkg::DMA_CH_SIZE + - DMA_INC_SRC_D1_OFFSET; + {25'h0, dma_reg_pkg::DMA_SRC_PTR_INC_D1_OFFSET}; dma_regintfc_start = 1'b1; end @@ -523,7 +506,7 @@ module im2col_spc dma_wdata = {9'h0, fifo_output.in_inc_d2} << (2 - reg2hw.data_type.q) & 32'h7fffff; dma_addr = core_v_mini_mcu_pkg::DMA_START_ADDRESS + dma_trans_free_channel * core_v_mini_mcu_pkg::DMA_CH_SIZE + - DMA_INC_SRC_D2_OFFSET; + {25'h0, dma_reg_pkg::DMA_SRC_PTR_INC_D2_OFFSET}; dma_regintfc_start = 1'b1; end @@ -531,7 +514,7 @@ module im2col_spc dma_wdata = (4 >> reg2hw.data_type.q) & 32'h3f; dma_addr = core_v_mini_mcu_pkg::DMA_START_ADDRESS + dma_trans_free_channel * core_v_mini_mcu_pkg::DMA_CH_SIZE + - DMA_INC_DST_D1_OFFSET; + {25'h0, dma_reg_pkg::DMA_DST_PTR_INC_D1_OFFSET}; dma_regintfc_start = 1'b1; end @@ -539,7 +522,7 @@ module im2col_spc dma_wdata = (4 >> reg2hw.data_type.q) & 32'h7fffff; dma_addr = core_v_mini_mcu_pkg::DMA_START_ADDRESS + dma_trans_free_channel * core_v_mini_mcu_pkg::DMA_CH_SIZE + - DMA_INC_DST_D2_OFFSET; + {25'h0, dma_reg_pkg::DMA_DST_PTR_INC_D2_OFFSET}; dma_regintfc_start = 1'b1; end @@ -547,7 +530,7 @@ module im2col_spc dma_wdata = {16'h0, fifo_output.size_du_d2}; dma_addr = core_v_mini_mcu_pkg::DMA_START_ADDRESS + dma_trans_free_channel * core_v_mini_mcu_pkg::DMA_CH_SIZE + - DMA_SIZE_D2_OFFSET; + {25'h0, dma_reg_pkg::DMA_SIZE_D2_OFFSET}; dma_regintfc_start = 1'b1; end @@ -555,7 +538,7 @@ module im2col_spc dma_wdata = {16'h0, fifo_output.size_du_d1}; dma_addr = core_v_mini_mcu_pkg::DMA_START_ADDRESS + dma_trans_free_channel * core_v_mini_mcu_pkg::DMA_CH_SIZE + - DMA_SIZE_D1_OFFSET; + {25'h0, dma_reg_pkg::DMA_SIZE_D1_OFFSET}; dma_regintfc_start = 1'b1; end diff --git a/sw/applications/example_im2col/im2col_lib.c b/sw/applications/example_im2col/im2col_lib.c index 5a16cb866..f089a88c5 100644 --- a/sw/applications/example_im2col/im2col_lib.c +++ b/sw/applications/example_im2col/im2col_lib.c @@ -391,31 +391,6 @@ int im2col_nchw_int32(uint8_t test_id, unsigned int *cycles) uint32_t* input_image_ptr = &input_image_nchw[0]; uint32_t* output_data_ptr = &output_data[0]; - /* Initializing PLIC */ - if(plic_Init()) - { - return EXIT_FAILURE; - }; - - if(plic_irq_set_priority(EXT_INTR_2, 1)) - { - return EXIT_FAILURE; - }; - - if(plic_irq_set_enabled(EXT_INTR_2, kPlicToggleEnabled)) - { - return EXIT_FAILURE; - }; - - plic_assign_external_irq_handler(EXT_INTR_2, &handler_irq_im2col_spc); - - /* Enable global interrupt for machine-level interrupts */ - CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); - - /* Set mie.MEIE bit to one to enable machine-level external interrupts */ - const uint32_t mask = 1 << 11; - CSR_SET_BITS(CSR_REG_MIE, mask); - #if TIMING CSR_SET_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); CSR_WRITE(CSR_REG_MCYCLE, 0); @@ -424,157 +399,32 @@ int im2col_nchw_int32(uint8_t test_id, unsigned int *cycles) #if TIMING CSR_READ(CSR_REG_MCYCLE, &cycles_A); #endif - - dma_init(NULL); - - /* Write the number of DMA channels the SPC has access to */ - write_register( SPC_CH_MASK, - IM2COL_SPC_SPC_CH_MASK_REG_OFFSET, - 0xffffffff, - 0, - IM2COL_SPC_BASE_ADDR ); - - /* Write the source */ - write_register( input_image_ptr, - IM2COL_SPC_SRC_PTR_REG_OFFSET, - 0xffffffff, - 0, - IM2COL_SPC_BASE_ADDR ); - - /* Write the destination */ - write_register( output_data_ptr, - IM2COL_SPC_DST_PTR_REG_OFFSET, - 0xffffffff, - 0, - IM2COL_SPC_BASE_ADDR ); - - /* Write the datatype */ - write_register( DMA_DATA_TYPE_WORD, - IM2COL_SPC_DATA_TYPE_REG_OFFSET, - IM2COL_SPC_DATA_TYPE_DATA_TYPE_MASK, - IM2COL_SPC_DATA_TYPE_DATA_TYPE_OFFSET, - IM2COL_SPC_BASE_ADDR ); - - /* Write the filter dimensions */ - write_register( FW, - IM2COL_SPC_FW_REG_OFFSET, - IM2COL_SPC_FW_SIZE_MASK, - IM2COL_SPC_FW_SIZE_OFFSET, - IM2COL_SPC_BASE_ADDR ); - - write_register( FH, - IM2COL_SPC_FH_REG_OFFSET, - IM2COL_SPC_FH_SIZE_MASK, - IM2COL_SPC_FH_SIZE_OFFSET, - IM2COL_SPC_BASE_ADDR ); - - /* Write the image dimensions */ - write_register( IW, - IM2COL_SPC_IW_REG_OFFSET, - 0xffffffff, - 0, - IM2COL_SPC_BASE_ADDR ); - - write_register( IH, - IM2COL_SPC_IH_REG_OFFSET, - 0xffffffff, - 0, - IM2COL_SPC_BASE_ADDR ); - - /* Write the CH_COL */ - write_register( CH_COL, - IM2COL_SPC_CH_COL_REG_OFFSET, - IM2COL_SPC_CH_COL_NUM_MASK, - IM2COL_SPC_CH_COL_NUM_OFFSET, - IM2COL_SPC_BASE_ADDR ); - - /* Write n_patches */ - write_register( N_PATCHES_W, - IM2COL_SPC_N_PATCHES_W_REG_OFFSET, - IM2COL_SPC_N_PATCHES_W_NUM_MASK, - IM2COL_SPC_N_PATCHES_W_NUM_OFFSET, - IM2COL_SPC_BASE_ADDR ); - - write_register( N_PATCHES_H, - IM2COL_SPC_N_PATCHES_H_REG_OFFSET, - IM2COL_SPC_N_PATCHES_H_NUM_MASK, - IM2COL_SPC_N_PATCHES_H_NUM_OFFSET, - IM2COL_SPC_BASE_ADDR ); - - /* Write the padding */ - write_register( LEFT_PAD, - IM2COL_SPC_PAD_LEFT_REG_OFFSET, - IM2COL_SPC_PAD_LEFT_PAD_MASK, - IM2COL_SPC_PAD_LEFT_PAD_OFFSET, - IM2COL_SPC_BASE_ADDR ); - - write_register( RIGHT_PAD, - IM2COL_SPC_PAD_RIGHT_REG_OFFSET, - IM2COL_SPC_PAD_RIGHT_PAD_MASK, - IM2COL_SPC_PAD_RIGHT_PAD_OFFSET, - IM2COL_SPC_BASE_ADDR ); - - write_register( TOP_PAD, - IM2COL_SPC_PAD_TOP_REG_OFFSET, - IM2COL_SPC_PAD_TOP_PAD_MASK, - IM2COL_SPC_PAD_TOP_PAD_OFFSET, - IM2COL_SPC_BASE_ADDR ); - - write_register( BOTTOM_PAD, - IM2COL_SPC_PAD_BOTTOM_REG_OFFSET, - IM2COL_SPC_PAD_BOTTOM_PAD_MASK, - IM2COL_SPC_PAD_BOTTOM_PAD_OFFSET, - IM2COL_SPC_BASE_ADDR ); - - /* - * Write the strides. With respect to test_2 these are the application-point-of-view - * strides, so they are the same as STRIDE_D1 and STRIDE_D2. - */ - write_register( (int) log2(STRIDE_D1), - IM2COL_SPC_LOG_STRIDES_D1_REG_OFFSET, - IM2COL_SPC_LOG_STRIDES_D1_SIZE_MASK, - IM2COL_SPC_LOG_STRIDES_D1_SIZE_OFFSET, - IM2COL_SPC_BASE_ADDR ); - - write_register( (int) log2(STRIDE_D2), - IM2COL_SPC_LOG_STRIDES_D2_REG_OFFSET, - IM2COL_SPC_LOG_STRIDES_D2_SIZE_MASK, - IM2COL_SPC_LOG_STRIDES_D2_SIZE_OFFSET, - IM2COL_SPC_BASE_ADDR ); - - /* Write the batch size */ - write_register( BATCH, - IM2COL_SPC_BATCH_REG_OFFSET, - IM2COL_SPC_BATCH_SIZE_MASK, - IM2COL_SPC_BATCH_SIZE_OFFSET, - IM2COL_SPC_BASE_ADDR ); - - /* Write the adapted pad regions */ - write_register( ADPT_PAD_RIGHT, - IM2COL_SPC_ADPT_PAD_RIGHT_REG_OFFSET, - 0xffffffff, - 0, - IM2COL_SPC_BASE_ADDR ); - - write_register( ADPT_PAD_BOTTOM, - IM2COL_SPC_ADPT_PAD_BOTTOM_REG_OFFSET, - 0xffffffff, - 0, - IM2COL_SPC_BASE_ADDR ); - - /* Enable the interrupt logic */ - write_register( 0x1, - IM2COL_SPC_INTERRUPT_EN_REG_OFFSET, - 0x1, - IM2COL_SPC_INTERRUPT_EN_EN_BIT, - IM2COL_SPC_BASE_ADDR ); - - /* Write the number of channels to start the process */ - write_register( CH, - IM2COL_SPC_NUM_CH_REG_OFFSET, - IM2COL_SPC_NUM_CH_NUM_MASK, - IM2COL_SPC_NUM_CH_NUM_OFFSET, - IM2COL_SPC_BASE_ADDR ); + + static im2col_trans_t im2col_spc_trans = { + .ch_mask = SPC_CH_MASK, + .im_width = IW, + .im_height = IH, + .filter_width = FW, + .filter_height = FH, + .num_channels = CH, + .num_channels_col = CH_COL, + .stride_d1 = STRIDE_D1, + .stride_d2 = STRIDE_D2, + .batch = BATCH, + .n_patches_w = N_PATCHES_W, + .n_patches_h = N_PATCHES_H, + .left_pad = LEFT_PAD, + .right_pad = RIGHT_PAD, + .top_pad = TOP_PAD, + .bottom_pad = BOTTOM_PAD, + .adpt_pad_right = ADPT_PAD_RIGHT, + .adpt_pad_bottom = ADPT_PAD_BOTTOM + }; + + im2col_spc_trans.src = input_image_ptr; + im2col_spc_trans.dst = output_data_ptr; + + run_im2col(im2col_spc_trans); waiting_for_spc_irq(); diff --git a/sw/applications/example_im2col/im2col_lib.h b/sw/applications/example_im2col/im2col_lib.h index ad36efd93..a05904ee6 100644 --- a/sw/applications/example_im2col/im2col_lib.h +++ b/sw/applications/example_im2col/im2col_lib.h @@ -17,6 +17,7 @@ #include #include "im2col_golden.h" #include "im2col_input.h" +#include "im2col.h" #include "dma.h" #include "im2col_spc_regs.h" #include "core_v_mini_mcu.h" diff --git a/sw/applications/example_tensor_format_conv/main.c b/sw/applications/example_tensor_format_conv/main.c index 0a774e251..575b8dee0 100644 --- a/sw/applications/example_tensor_format_conv/main.c +++ b/sw/applications/example_tensor_format_conv/main.c @@ -11,7 +11,7 @@ * This is a simple example of a HWC tensor with 3 channels, 2 rows and 2 columns: * (1, 2, 3) (4, 5, 6) * (7, 8, 9) (10, 11, 12) - * (1, 2, 3) are the values of the first "pixel" of the tensor across differenta channels, 1 for CH0, 2 for CH1 and 3 for CH2. + * (1, 2, 3) are the values of the first "pixel" of the tensor across different channels, 1 for CH0, 2 for CH1 and 3 for CH2. * * On the other hand, this is the same tensor represented with the CHW format: * diff --git a/sw/device/lib/drivers/dma/dma_golden b/sw/device/lib/drivers/dma/dma_golden deleted file mode 100644 index 7576332d6..000000000 --- a/sw/device/lib/drivers/dma/dma_golden +++ /dev/null @@ -1,159 +0,0 @@ -run 1: - -PAD_TOP: 8 - -PAD_BOTTOM: 0 - -PAD_LEFT: 4 - -PAD_RIGHT: 4 - -SRC_PTR: 52180 - -DST_PTR: 53076 - -DIM_INV: 0 - -SRC_PTR_INC_D1: 8 - -SRC_PTR_INC_D2: 56 - -DST_PTR_INC_D1: 4 - -DST_PTR_INC_D2: 4 - -MODE: 0 - -DIM_CONFIG: 1 - -DIM_CONFIG: 1 - -RX_TRIGGER_SLOT: 0 - -TX_TRIGGER_SLOT: 0 - -DATA_TYPE: 0 - -SIZE_D2: 8 - -SIZE_D1: 12 - -run 2: - -PAD_TOP: 8 - -PAD_BOTTOM: 0 - -PAD_LEFT: 4 - -PAD_RIGHT: 4 - -SRC_PTR: 52184 - -DST_PTR: 53156 - -DIM_INV: 0 - -SRC_PTR_INC_D1: 8 - -SRC_PTR_INC_D2: 56 - -DST_PTR_INC_D1: 4 - -DST_PTR_INC_D2: 4 - -MODE: 0 - -DIM_CONFIG: 1 - -DIM_CONFIG: 1 - -RX_TRIGGER_SLOT: 0 - -TX_TRIGGER_SLOT: 0 - -DATA_TYPE: 0 - -SIZE_D2: 8 - -SIZE_D1: 12 - -run 3: - -PAD_TOP: 4 - -PAD_BOTTOM: 4 - -PAD_LEFT: 4 - -PAD_RIGHT: 4 - -SRC_PTR: 52132 - -DST_PTR: 53236 - -DIM_INV: 0 - -SRC_PTR_INC_D1: 8 - -SRC_PTR_INC_D2: 56 - -DST_PTR_INC_D1: 4 - -DST_PTR_INC_D2: 4 - -MODE: 0 - -DIM_CONFIG: 1 - -DIM_CONFIG: 1 - -RX_TRIGGER_SLOT: 0 - -TX_TRIGGER_SLOT: 0 - -DATA_TYPE: 0 - -SIZE_D2: 8 - -SIZE_D1: 12 - -run 4: - -PAD_TOP: 4 - -PAD_BOTTOM: 4 - -PAD_LEFT: 4 - -PAD_RIGHT: 4 - -SRC_PTR: 52136 - -DST_PTR: 53316 - -DIM_INV: 0 - -SRC_PTR_INC_D1: 8 - -SRC_PTR_INC_D2: 56 - -DST_PTR_INC_D1: 4 - -DST_PTR_INC_D2: 4 - -MODE: 0 - -DIM_CONFIG: 1 - -DIM_CONFIG: 1 - -RX_TRIGGER_SLOT: 0 - -TX_TRIGGER_SLOT: 0 - -DATA_TYPE: 0 - -SIZE_D2: 8 - -SIZE_D1: 12 \ No newline at end of file diff --git a/sw/device/lib/drivers/dma/dma_im2col b/sw/device/lib/drivers/dma/dma_im2col deleted file mode 100644 index b092b5073..000000000 --- a/sw/device/lib/drivers/dma/dma_im2col +++ /dev/null @@ -1,121 +0,0 @@ -run 1: - -DIM_CONFIG: 1 - -DATA_TYPE: 0 - -PAD_TOP: 8 - -PAD_BOTTOM: 0 - -PAD_LEFT: 4 - -PAD_RIGHT: 4 - -SRC_PTR: 52180 - -DST_PTR: 53076 - -SRC_PTR_INC_D1: 8 - -SRC_PTR_INC_D2: 56 - -DST_PTR_INC_D1: 4 - -DST_PTR_INC_D2: 4 - -SIZE_D2: 8 - -SIZE_D1: 12 - -run 2: - -DIM_CONFIG: 1 - -DIM_CONFIG: 1 - -DATA_TYPE: 0 - -PAD_TOP: 8 - -PAD_BOTTOM: 0 - -PAD_LEFT: 4 - -PAD_RIGHT: 4 - -SRC_PTR: 52184 - -DST_PTR: 53156 - -SRC_PTR_INC_D1: 8 - -SRC_PTR_INC_D2: 56 - -DST_PTR_INC_D1: 4 - -DST_PTR_INC_D2: 4 - -SIZE_D2: 8 - -SIZE_D1: 12 - -run 3: - -DIM_CONFIG: 1 - -DATA_TYPE: 0 - -PAD_TOP: 4 - -PAD_BOTTOM: 4 - -PAD_LEFT: 4 - -PAD_RIGHT: 4 - -SRC_PTR: 52132 - -DST_PTR: 53236 - -SRC_PTR_INC_D1: 8 - -SRC_PTR_INC_D2: 56 - -DST_PTR_INC_D1: 4 - -DST_PTR_INC_D2: 4 - -SIZE_D2: 8 - -SIZE_D1: 12 - -run 4: - -DIM_CONFIG: 1 - -DATA_TYPE: 0 - -PAD_TOP: 4 - -PAD_BOTTOM: 4 - -PAD_LEFT: 4 - -PAD_RIGHT: 4 - -SRC_PTR: 52136 - -DST_PTR: 53316 - -SRC_PTR_INC_D1: 8 - -SRC_PTR_INC_D2: 56 - -DST_PTR_INC_D1: 4 - -DST_PTR_INC_D2: 4 - -SIZE_D2: 8 - -SIZE_D1: 12 \ No newline at end of file diff --git a/sw/device/lib/drivers/dma/dma_output.txt b/sw/device/lib/drivers/dma/dma_output.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/sw/device/lib/drivers/im2col_spc/im2col.c b/sw/device/lib/drivers/im2col_spc/im2col.c new file mode 100644 index 000000000..3b98983ab --- /dev/null +++ b/sw/device/lib/drivers/im2col_spc/im2col.c @@ -0,0 +1,198 @@ +/* + Copyright EPFL contributors. + Licensed under the Apache License, Version 2.0, see LICENSE for details. + SPDX-License-Identifier: Apache-2.0 + + Author: Tommaso Terzano + + + Info: This simple HAL is used to load the im2col SPC and to run it. +*/ + +#include "im2col.h" + +void handler_irq_im2col_spc( void ) +{ + /* Read the IFR to lower the interrupt flag */ + * (volatile uint32_t * )(IM2COL_SPC_BASE_ADDR + IM2COL_SPC_SPC_IFR_REG_OFFSET); + return; +} + +int run_im2col(im2col_trans_t trans){ + + /* Initializing PLIC */ + if(plic_Init()) + { + return EXIT_FAILURE; + }; + + if(plic_irq_set_priority(EXT_INTR_2, 1)) + { + return EXIT_FAILURE; + }; + + if(plic_irq_set_enabled(EXT_INTR_2, kPlicToggleEnabled)) + { + return EXIT_FAILURE; + }; + + plic_assign_external_irq_handler(EXT_INTR_2, &handler_irq_im2col_spc); + + /* Enable global interrupt for machine-level interrupts */ + CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); + + /* Set mie.MEIE bit to one to enable machine-level external interrupts */ + const uint32_t mask = 1 << 11; + CSR_SET_BITS(CSR_REG_MIE, mask); + + dma_init(NULL); + + /* Write the number of DMA channels the SPC has access to */ + write_register( trans.ch_mask, + IM2COL_SPC_SPC_CH_MASK_REG_OFFSET, + 0xffffffff, + 0, + IM2COL_SPC_BASE_ADDR ); + + /* Write the source */ + write_register( trans.src, + IM2COL_SPC_SRC_PTR_REG_OFFSET, + 0xffffffff, + 0, + IM2COL_SPC_BASE_ADDR ); + + /* Write the destination */ + write_register( trans.dst, + IM2COL_SPC_DST_PTR_REG_OFFSET, + 0xffffffff, + 0, + IM2COL_SPC_BASE_ADDR ); + + /* Write the datatype */ + write_register( DMA_DATA_TYPE_WORD, + IM2COL_SPC_DATA_TYPE_REG_OFFSET, + IM2COL_SPC_DATA_TYPE_DATA_TYPE_MASK, + IM2COL_SPC_DATA_TYPE_DATA_TYPE_OFFSET, + IM2COL_SPC_BASE_ADDR ); + + /* Write the filter dimensions */ + write_register( trans.filter_width, + IM2COL_SPC_FW_REG_OFFSET, + IM2COL_SPC_FW_SIZE_MASK, + IM2COL_SPC_FW_SIZE_OFFSET, + IM2COL_SPC_BASE_ADDR ); + + write_register( trans.filter_height, + IM2COL_SPC_FH_REG_OFFSET, + IM2COL_SPC_FH_SIZE_MASK, + IM2COL_SPC_FH_SIZE_OFFSET, + IM2COL_SPC_BASE_ADDR ); + + /* Write the image dimensions */ + write_register( trans.im_width, + IM2COL_SPC_IW_REG_OFFSET, + 0xffffffff, + 0, + IM2COL_SPC_BASE_ADDR ); + + write_register( trans.im_height, + IM2COL_SPC_IH_REG_OFFSET, + 0xffffffff, + 0, + IM2COL_SPC_BASE_ADDR ); + + /* Write the CH_COL */ + write_register( trans.num_channels_col, + IM2COL_SPC_CH_COL_REG_OFFSET, + IM2COL_SPC_CH_COL_NUM_MASK, + IM2COL_SPC_CH_COL_NUM_OFFSET, + IM2COL_SPC_BASE_ADDR ); + + /* Write n_patches */ + write_register( trans.n_patches_w, + IM2COL_SPC_N_PATCHES_W_REG_OFFSET, + IM2COL_SPC_N_PATCHES_W_NUM_MASK, + IM2COL_SPC_N_PATCHES_W_NUM_OFFSET, + IM2COL_SPC_BASE_ADDR ); + + write_register( trans.n_patches_h, + IM2COL_SPC_N_PATCHES_H_REG_OFFSET, + IM2COL_SPC_N_PATCHES_H_NUM_MASK, + IM2COL_SPC_N_PATCHES_H_NUM_OFFSET, + IM2COL_SPC_BASE_ADDR ); + + /* Write the padding */ + write_register( trans.left_pad, + IM2COL_SPC_PAD_LEFT_REG_OFFSET, + IM2COL_SPC_PAD_LEFT_PAD_MASK, + IM2COL_SPC_PAD_LEFT_PAD_OFFSET, + IM2COL_SPC_BASE_ADDR ); + + write_register( trans.right_pad, + IM2COL_SPC_PAD_RIGHT_REG_OFFSET, + IM2COL_SPC_PAD_RIGHT_PAD_MASK, + IM2COL_SPC_PAD_RIGHT_PAD_OFFSET, + IM2COL_SPC_BASE_ADDR ); + + write_register( trans.top_pad, + IM2COL_SPC_PAD_TOP_REG_OFFSET, + IM2COL_SPC_PAD_TOP_PAD_MASK, + IM2COL_SPC_PAD_TOP_PAD_OFFSET, + IM2COL_SPC_BASE_ADDR ); + + write_register( trans.bottom_pad, + IM2COL_SPC_PAD_BOTTOM_REG_OFFSET, + IM2COL_SPC_PAD_BOTTOM_PAD_MASK, + IM2COL_SPC_PAD_BOTTOM_PAD_OFFSET, + IM2COL_SPC_BASE_ADDR ); + + /* + * Write the strides. With respect to test_2 these are the application-point-of-view + * strides, so they are the same as STRIDE_D1 and STRIDE_D2. + */ + write_register( (int) log2(trans.stride_d1), + IM2COL_SPC_LOG_STRIDES_D1_REG_OFFSET, + IM2COL_SPC_LOG_STRIDES_D1_SIZE_MASK, + IM2COL_SPC_LOG_STRIDES_D1_SIZE_OFFSET, + IM2COL_SPC_BASE_ADDR ); + + write_register( (int) log2(trans.stride_d2), + IM2COL_SPC_LOG_STRIDES_D2_REG_OFFSET, + IM2COL_SPC_LOG_STRIDES_D2_SIZE_MASK, + IM2COL_SPC_LOG_STRIDES_D2_SIZE_OFFSET, + IM2COL_SPC_BASE_ADDR ); + + /* Write the batch size */ + write_register( trans.batch, + IM2COL_SPC_BATCH_REG_OFFSET, + IM2COL_SPC_BATCH_SIZE_MASK, + IM2COL_SPC_BATCH_SIZE_OFFSET, + IM2COL_SPC_BASE_ADDR ); + + /* Write the adapted pad regions */ + write_register( trans.adpt_pad_right, + IM2COL_SPC_ADPT_PAD_RIGHT_REG_OFFSET, + 0xffffffff, + 0, + IM2COL_SPC_BASE_ADDR ); + + write_register( trans.adpt_pad_bottom, + IM2COL_SPC_ADPT_PAD_BOTTOM_REG_OFFSET, + 0xffffffff, + 0, + IM2COL_SPC_BASE_ADDR ); + + /* Enable the interrupt logic */ + write_register( 0x1, + IM2COL_SPC_INTERRUPT_EN_REG_OFFSET, + 0x1, + IM2COL_SPC_INTERRUPT_EN_EN_BIT, + IM2COL_SPC_BASE_ADDR ); + + /* Write the number of channels to start the process */ + write_register( trans.num_channels, + IM2COL_SPC_NUM_CH_REG_OFFSET, + IM2COL_SPC_NUM_CH_NUM_MASK, + IM2COL_SPC_NUM_CH_NUM_OFFSET, + IM2COL_SPC_BASE_ADDR ); +} diff --git a/sw/device/lib/drivers/im2col_spc/im2col.h b/sw/device/lib/drivers/im2col_spc/im2col.h new file mode 100644 index 000000000..0bcd055e7 --- /dev/null +++ b/sw/device/lib/drivers/im2col_spc/im2col.h @@ -0,0 +1,66 @@ +/* + Copyright EPFL contributors. + Licensed under the Apache License, Version 2.0, see LICENSE for details. + SPDX-License-Identifier: Apache-2.0 + + Author: Tommaso Terzano + + + Info: This simple HAL is used to load the im2col SPC and to run it. +*/ + +#ifndef _IM2COL_SPC_ +#define _IM2COL_SPC_ + +#include +#include +#include +#include "dma.h" +#include "im2col_spc_regs.h" +#include "core_v_mini_mcu.h" +#include "x-heep.h" +#include "rv_plic.h" +#include "csr.h" +#include + +#include "mmio.h" +#include "handler.h" +#include "hart.h" +#include "fast_intr_ctrl.h" + +// uint32_t IW, uint32_t IH, uint32_t FW, uint32_t FH, uint32_t CH, uint32_t CH_COL, +// uint32_t STRIDE_D1, uint32_t STRIDE_D2, uint32_t BATCH, uint32_t N_PATCHES_W, +// uint32_t N_PATCHES_H, uint32_t LEFT_PAD, uint32_t RIGHT_PAD, uint32_t TOP_PAD, +// uint32_t BOTTOM_PAD, uint32_t ADPT_PAD_RIGHT, uint32_t ADPT_PAD_BOTTOM, + +typedef struct +{ + uint32_t* src; /*!< Target from where the data will be copied. */ + uint32_t* dst; /*!< Target to where the data will be copied. */ + uint32_t ch_mask; /*!< Mask of the channels to be used. */ + uint32_t im_width; /*!< Width of the input image. */ + uint32_t im_height; /*!< Height of the input image. */ + uint32_t filter_width; /*!< Width of the filter. */ + uint32_t filter_height; /*!< Height of the filter. */ + uint32_t num_channels; /*!< Number of channels. */ + uint32_t num_channels_col; /*!< Number of channels to be processed. */ + uint32_t stride_d1; /*!< Stride in the first dimension. */ + uint32_t stride_d2; /*!< Stride in the second dimension. */ + uint32_t batch; /*!< Number of batches. */ + uint32_t n_patches_w; /*!< Number of patches in the width. */ + uint32_t n_patches_h; /*!< Number of patches in the height. */ + uint32_t left_pad; /*!< Padding on the left. */ + uint32_t right_pad; /*!< Padding on the right. */ + uint32_t top_pad; /*!< Padding on the top. */ + uint32_t bottom_pad; /*!< Padding on the bottom. */ + uint32_t adpt_pad_right; /*!< Adaptive padding on the right. */ + uint32_t adpt_pad_bottom; /*!< Adaptive padding on the bottom. */ +} im2col_trans_t; + +/* Base address of the im2col SPC */ +#define IM2COL_SPC_BASE_ADDR EXT_PERIPHERAL_START_ADDRESS + 0x4000 + +int run_im2col(im2col_trans_t trans); +__attribute__((weak, optimize("00"))) void handler_irq_im2col_spc(void); + +#endif \ No newline at end of file