实战教学:通用卷积神经网络加速器的Verilog实现(第一部分) - Axi-Lite实操探索
最编程
2024-02-05 13:10:56
...
最终是想在zynq7000系类的fpga上实现yolo系类,以及其他轻量型网络,第一阶段主要是学习AXI总线相关的知识,并在fpga上实现,第二阶段主要是卷积神经网络的相关函数如何映射到fpga上去,第三阶段主要是写驱动应用程序,并上板调试。
axi读写架构
一.AXI总线协议规范:
1.无论写操作和读操作,地址信息和控制信息将在数据传输之前进行信息传输
2.支持多种数据的传输
3.支持乱序传输
信息源通过VALID信号显示数据信息或者控制信息的有效性;
而接受方则利用READY信号表示可以接收相应的数据;
还有一个LAST信号表示在读数据通道或者写数据通道中最后一个有效数据的到达
二.AXI总线的握手机制
1.五个单向通道体系结构均采用同样的VALID/READY两个握手信号来进行地址信息,控制信息,写/读数据信息,以及应答信息的传输。
2.信息源产生的高电平有效的VALID信号标志出地址信息,控制信息以及数据信息,应答信息等多种信息的有效传输或接收;
3.目标源产生的同样为高电平有效的READY信号则表明了可以接收相对应的信息。
VALID和READY信号均为高电平有效,也只有两个信号都为高电平时,传输才能完成。
4.AXI总线协议同时也规范了在AXI maste与AXI slave主从设备的输入输出信号之间不允许存在任何组合逻辑
5.握手信号之间存在的相关性:在握手中死锁会发生带来信号传输的崩溃,不同模块之间的握手信号不存在任何相关性
三.工程搭建
`timescale 1 ns / 1 ps
module axi_lite_slave #
(
parameter integer C_S_AXI_DATA_WIDTH = 32,
parameter integer C_S_AXI_ADDR_WIDTH = 6 //地址位宽
)
(
input wire S_AXI_ACLK,
input wire S_AXI_ARESETN,
input wire [C_S_AXI_ADDR_WIDTH-1 : 0] S_AXI_AWADDR,
input wire [2 : 0] S_AXI_AWPROT,
input wire S_AXI_AWVALID,
output wire S_AXI_AWREADY,
input wire [C_S_AXI_DATA_WIDTH-1 : 0] S_AXI_WDATA,
input wire [(C_S_AXI_DATA_WIDTH/8)-1 : 0] S_AXI_WSTRB,
input wire S_AXI_WVALID,
output wire S_AXI_WREADY,
output wire [1 : 0] S_AXI_BRESP,
output wire S_AXI_BVALID,
input wire S_AXI_BREADY,
input wire [C_S_AXI_ADDR_WIDTH-1 : 0] S_AXI_ARADDR,
input wire [2 : 0] S_AXI_ARPROT,
input wire S_AXI_ARVALID,
output wire S_AXI_ARREADY,
output wire [C_S_AXI_DATA_WIDTH-1 : 0] S_AXI_RDATA,
output wire [1 : 0] S_AXI_RRESP,
output wire S_AXI_RVALID,
input wire S_AXI_RREADY
);
reg [C_S_AXI_ADDR_WIDTH-1 : 0] axi_awaddr;
reg axi_awready;
reg axi_wready;
reg [1 : 0] axi_bresp;
reg axi_bvalid;
reg [C_S_AXI_ADDR_WIDTH-1 : 0] axi_araddr;
reg axi_arready;
reg [C_S_AXI_DATA_WIDTH-1 : 0] axi_rdata;
reg [1 : 0] axi_rresp;
reg axi_rvalid;
localparam integer ADDR_LSB = (C_S_AXI_DATA_WIDTH/32) + 1;
localparam integer OPT_MEM_ADDR_BITS = 4-1;
wire slv_reg_rden;
wire slv_reg_wren;
reg [C_S_AXI_DATA_WIDTH-1:0] reg_data_out;
integer byte_index;
assign S_AXI_AWREADY = axi_awready;
assign S_AXI_WREADY = axi_wready;
assign S_AXI_BRESP = axi_bresp;
assign S_AXI_BVALID = axi_bvalid;
assign S_AXI_ARREADY = axi_arready;
assign S_AXI_RDATA = axi_rdata;
assign S_AXI_RRESP = axi_rresp;
assign S_AXI_RVALID = axi_rvalid;
always @( posedge S_AXI_ACLK )
begin
if ( S_AXI_ARESETN == 1'b0 )
begin
axi_awready <= 1'b0;
end
else
begin
if (~axi_awready && S_AXI_AWVALID && S_AXI_WVALID)
begin
axi_awready <= 1'b1;
end
else
begin
axi_awready <= 1'b0;
end
end
end
always @( posedge S_AXI_ACLK )
begin
if ( S_AXI_ARESETN == 1'b0 )
begin
axi_awaddr <= 0;
end
else
begin
if (~axi_awready && S_AXI_AWVALID && S_AXI_WVALID)
begin
axi_awaddr <= S_AXI_AWADDR;
end
end
end
always @( posedge S_AXI_ACLK )
begin
if ( S_AXI_ARESETN == 1'b0 )
begin
axi_wready <= 1'b0;
end
else
begin
if (~axi_wready && S_AXI_WVALID && S_AXI_AWVALID)
begin
axi_wready <= 1'b1;
end
else
begin
axi_wready <= 1'b0;
end
end
end
assign slv_reg_wren = axi_wready && S_AXI_WVALID && axi_awready && S_AXI_AWVALID;
always @( posedge S_AXI_ACLK )
begin
if ( S_AXI_ARESETN == 1'b0 )
begin
axi_bvalid <= 0;
axi_bresp <= 2'b0;
end
else
begin
if (axi_awready && S_AXI_AWVALID && ~axi_bvalid && axi_wready && S_AXI_WVALID)
begin
axi_bvalid <= 1'b1;
axi_bresp <= 2'b0;
end
else
begin
if (S_AXI_BREADY && axi_bvalid)
begin
axi_bvalid <= 1'b0;
end
end
end
end
always @( posedge S_AXI_ACLK )
begin
if ( S_AXI_ARESETN == 1'b0 )
begin
axi_arready <= 1'b0;
axi_araddr <= 32'b0;
end
else
begin
if (~axi_arready && S_AXI_ARVALID)
begin
axi_arready <= 1'b1;
axi_araddr <= S_AXI_ARADDR;
end
else
begin
axi_arready <= 1'b0;
end
end
end
always @( posedge S_AXI_ACLK )
begin
if ( S_AXI_ARESETN == 1'b0 )
begin
axi_rvalid <= 0;
axi_rresp <= 0;
end
else
begin
if (axi_arready && S_AXI_ARVALID && ~axi_rvalid)
begin
axi_rvalid <= 1'b1;
axi_rresp <= 2'b0;
end
else if (axi_rvalid && S_AXI_RREADY)
begin
axi_rvalid <= 1'b0;
end
end
end
always @( posedge S_AXI_ACLK )
begin
if ( S_AXI_ARESETN == 1'b0 )
begin
axi_rdata <= 0;
end
else
begin
if (slv_reg_rden)
begin
axi_rdata <= reg_data_out;
end
end
end
reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg0;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
begin
slv_reg0 <= 0;
end
else
if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d0) )
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 )
begin
slv_reg0[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg1;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
begin
slv_reg1 <= 0;
end
else
if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d1) )
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 )
begin
slv_reg1[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg2;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
begin
slv_reg2 <= 0;
end
else
if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d2) )
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 )
begin
slv_reg2[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg3;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
begin
slv_reg3 <= 0;
end
else
if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d3) )
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 )
begin
slv_reg3[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg4;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
begin
slv_reg4 <= 0;
end
else
if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d4) )
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 )
begin
slv_reg4[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg5;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
begin
slv_reg5 <= 0;
end
else
if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d5) )
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 )
begin
slv_reg5[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg6;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
begin
slv_reg6 <= 0;
end
else
if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d6) )
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 )
begin
slv_reg6[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg7;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
begin
slv_reg7 <= 0;
end
else
if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d7) )
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 )
begin
slv_reg7[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg8;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
begin
slv_reg8 <= 0;
end
else
if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d8) )
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 )
begin
slv_reg8[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg9;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
begin
slv_reg9 <= 0;
end
else
if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d9) )
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 )
begin
slv_reg9[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg10;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
begin
slv_reg10 <= 0;
end
else
if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d10) )
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 )
begin
slv_reg10[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg11;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
begin
slv_reg11 <= 0;
end
else
if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d11) )
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 )
begin
slv_reg11[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg12;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
begin
slv_reg12 <= 0;
end
else
if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d12) )
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 )
begin
slv_reg12[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg13;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
begin
slv_reg13 <= 0;
end
else
if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d13) )
for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
if ( S_AXI_WSTRB[byte_index] == 1 )
begin
slv_reg13[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
end
reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg14;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
begin
slv_reg14 <= 0;
end
else
if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d14) )