欢迎您访问 最编程 本站为您分享编程语言代码,编程技术文章!
您现在的位置是: 首页

实战教学:通用卷积神经网络加速器的Verilog实现(第一部分) - Axi-Lite实操探索

最编程 2024-02-05 13:10:56
...

最终是想在zynq7000系类的fpga上实现yolo系类,以及其他轻量型网络,第一阶段主要是学习AXI总线相关的知识,并在fpga上实现,第二阶段主要是卷积神经网络的相关函数如何映射到fpga上去,第三阶段主要是写驱动应用程序,并上板调试。

axi读写架构

一.AXI总线协议规范:

1.无论写操作和读操作,地址信息和控制信息将在数据传输之前进行信息传输
2.支持多种数据的传输
3.支持乱序传输
信息源通过VALID信号显示数据信息或者控制信息的有效性;
而接受方则利用READY信号表示可以接收相应的数据;
还有一个LAST信号表示在读数据通道或者写数据通道中最后一个有效数据的到达
在这里插入图片描述
在这里插入图片描述

二.AXI总线的握手机制

1.五个单向通道体系结构均采用同样的VALID/READY两个握手信号来进行地址信息,控制信息,写/读数据信息,以及应答信息的传输。

2.信息源产生的高电平有效的VALID信号标志出地址信息,控制信息以及数据信息,应答信息等多种信息的有效传输或接收;

3.目标源产生的同样为高电平有效的READY信号则表明了可以接收相对应的信息。
VALID和READY信号均为高电平有效,也只有两个信号都为高电平时,传输才能完成。
4.AXI总线协议同时也规范了在AXI maste与AXI slave主从设备的输入输出信号之间不允许存在任何组合逻辑

5.握手信号之间存在的相关性:在握手中死锁会发生带来信号传输的崩溃,不同模块之间的握手信号不存在任何相关性
在这里插入图片描述

三.工程搭建

`timescale 1 ns / 1 ps

module axi_lite_slave #
(
	parameter integer C_S_AXI_DATA_WIDTH	= 32,
	parameter integer C_S_AXI_ADDR_WIDTH	= 6   //地址位宽
)
(
	input wire  S_AXI_ACLK,
	input wire  S_AXI_ARESETN,
	input wire [C_S_AXI_ADDR_WIDTH-1 : 0] S_AXI_AWADDR,
	input wire [2 : 0] S_AXI_AWPROT,
	input wire  S_AXI_AWVALID,
	output wire  S_AXI_AWREADY,
	input wire [C_S_AXI_DATA_WIDTH-1 : 0] S_AXI_WDATA,
	input wire [(C_S_AXI_DATA_WIDTH/8)-1 : 0] S_AXI_WSTRB,
	input wire  S_AXI_WVALID,
	output wire  S_AXI_WREADY,
	output wire [1 : 0] S_AXI_BRESP,
	output wire  S_AXI_BVALID,
	input wire  S_AXI_BREADY,
	input wire [C_S_AXI_ADDR_WIDTH-1 : 0] S_AXI_ARADDR,
	input wire [2 : 0] S_AXI_ARPROT,
	input wire  S_AXI_ARVALID,
	output wire  S_AXI_ARREADY,
	output wire [C_S_AXI_DATA_WIDTH-1 : 0] S_AXI_RDATA,
	output wire [1 : 0] S_AXI_RRESP,
	output wire  S_AXI_RVALID,
	input wire  S_AXI_RREADY
);

reg [C_S_AXI_ADDR_WIDTH-1 : 0] 	axi_awaddr;
reg  	axi_awready;
reg  	axi_wready;
reg [1 : 0] 	axi_bresp;
reg  	axi_bvalid;
reg [C_S_AXI_ADDR_WIDTH-1 : 0] 	axi_araddr;
reg  	axi_arready;
reg [C_S_AXI_DATA_WIDTH-1 : 0] 	axi_rdata;
reg [1 : 0] 	axi_rresp;
reg  	axi_rvalid;

localparam integer ADDR_LSB = (C_S_AXI_DATA_WIDTH/32) + 1;
localparam integer OPT_MEM_ADDR_BITS = 4-1;

wire	 slv_reg_rden;
wire	 slv_reg_wren;
reg [C_S_AXI_DATA_WIDTH-1:0]	 reg_data_out;
integer	 byte_index;

assign S_AXI_AWREADY	= axi_awready;
assign S_AXI_WREADY	= axi_wready;
assign S_AXI_BRESP	= axi_bresp;
assign S_AXI_BVALID	= axi_bvalid;
assign S_AXI_ARREADY	= axi_arready;
assign S_AXI_RDATA	= axi_rdata;
assign S_AXI_RRESP	= axi_rresp;
assign S_AXI_RVALID	= axi_rvalid;

always @( posedge S_AXI_ACLK )
begin
  if ( S_AXI_ARESETN == 1'b0 )
    begin
      axi_awready <= 1'b0;
    end 
  else
    begin    
      if (~axi_awready && S_AXI_AWVALID && S_AXI_WVALID)
        begin
          axi_awready <= 1'b1;
        end
      else           
        begin
          axi_awready <= 1'b0;
        end
    end 
end       

always @( posedge S_AXI_ACLK )
begin
  if ( S_AXI_ARESETN == 1'b0 )
    begin
      axi_awaddr <= 0;
    end 
  else
    begin    
      if (~axi_awready && S_AXI_AWVALID && S_AXI_WVALID)
        begin
          axi_awaddr <= S_AXI_AWADDR;
        end
    end 
end       

always @( posedge S_AXI_ACLK )
begin
  if ( S_AXI_ARESETN == 1'b0 )
    begin
      axi_wready <= 1'b0;
    end 
  else
    begin    
      if (~axi_wready && S_AXI_WVALID && S_AXI_AWVALID)
        begin
          axi_wready <= 1'b1;
        end
      else
        begin
          axi_wready <= 1'b0;
        end
    end 
end       

assign slv_reg_wren = axi_wready && S_AXI_WVALID && axi_awready && S_AXI_AWVALID;

always @( posedge S_AXI_ACLK )
begin
  if ( S_AXI_ARESETN == 1'b0 )
    begin
      axi_bvalid  <= 0;
      axi_bresp   <= 2'b0;
    end 
  else
    begin    
      if (axi_awready && S_AXI_AWVALID && ~axi_bvalid && axi_wready && S_AXI_WVALID)
        begin
          axi_bvalid <= 1'b1;
          axi_bresp  <= 2'b0;  
        end                   
      else
        begin
          if (S_AXI_BREADY && axi_bvalid) 
            begin
              axi_bvalid <= 1'b0; 
            end  
        end
    end
end   

always @( posedge S_AXI_ACLK )
begin
  if ( S_AXI_ARESETN == 1'b0 )
    begin
      axi_arready <= 1'b0;
      axi_araddr  <= 32'b0;
    end 
  else
    begin    
      if (~axi_arready && S_AXI_ARVALID)
        begin
          axi_arready <= 1'b1;
          axi_araddr  <= S_AXI_ARADDR;
        end
      else
        begin
          axi_arready <= 1'b0;
        end
    end 
end       

always @( posedge S_AXI_ACLK )
begin
  if ( S_AXI_ARESETN == 1'b0 )
    begin
      axi_rvalid <= 0;
      axi_rresp  <= 0;
    end 
  else
    begin    
      if (axi_arready && S_AXI_ARVALID && ~axi_rvalid)
        begin
          axi_rvalid <= 1'b1;
          axi_rresp  <= 2'b0;
        end   
      else if (axi_rvalid && S_AXI_RREADY)
        begin
          axi_rvalid <= 1'b0;
        end                
    end
end    

always @( posedge S_AXI_ACLK )
begin
  if ( S_AXI_ARESETN == 1'b0 )
    begin
      axi_rdata  <= 0;
    end 
  else
    begin    
      if (slv_reg_rden)
        begin
          axi_rdata <= reg_data_out; 
        end   
    end
end    

reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg0;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
	begin
		slv_reg0 <= 0;
	end
else
	if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d0) )
		for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
			if ( S_AXI_WSTRB[byte_index] == 1 ) 
				begin
					slv_reg0[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
				end

reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg1;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
	begin
		slv_reg1 <= 0;
	end
else
	if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d1) )
		for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
			if ( S_AXI_WSTRB[byte_index] == 1 ) 
				begin
					slv_reg1[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
				end

reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg2;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
	begin
		slv_reg2 <= 0;
	end
else
	if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d2) )
		for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
			if ( S_AXI_WSTRB[byte_index] == 1 ) 
				begin
					slv_reg2[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
				end

reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg3;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
	begin
		slv_reg3 <= 0;
	end
else
	if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d3) )
		for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
			if ( S_AXI_WSTRB[byte_index] == 1 ) 
				begin
					slv_reg3[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
				end

reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg4;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
	begin
		slv_reg4 <= 0;
	end
else
	if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d4) )
		for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
			if ( S_AXI_WSTRB[byte_index] == 1 ) 
				begin
					slv_reg4[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
				end

reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg5;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
	begin
		slv_reg5 <= 0;
	end
else
	if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d5) )
		for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
			if ( S_AXI_WSTRB[byte_index] == 1 ) 
				begin
					slv_reg5[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
				end

reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg6;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
	begin
		slv_reg6 <= 0;
	end
else
	if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d6) )
		for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
			if ( S_AXI_WSTRB[byte_index] == 1 ) 
				begin
					slv_reg6[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
				end

reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg7;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
	begin
		slv_reg7 <= 0;
	end
else
	if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d7) )
		for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
			if ( S_AXI_WSTRB[byte_index] == 1 ) 
				begin
					slv_reg7[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
				end

reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg8;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
	begin
		slv_reg8 <= 0;
	end
else
	if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d8) )
		for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
			if ( S_AXI_WSTRB[byte_index] == 1 ) 
				begin
					slv_reg8[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
				end

reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg9;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
	begin
		slv_reg9 <= 0;
	end
else
	if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d9) )
		for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
			if ( S_AXI_WSTRB[byte_index] == 1 ) 
				begin
					slv_reg9[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
				end

reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg10;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
	begin
		slv_reg10 <= 0;
	end
else
	if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d10) )
		for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
			if ( S_AXI_WSTRB[byte_index] == 1 ) 
				begin
					slv_reg10[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
				end

reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg11;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
	begin
		slv_reg11 <= 0;
	end
else
	if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d11) )
		for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
			if ( S_AXI_WSTRB[byte_index] == 1 ) 
				begin
					slv_reg11[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
				end

reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg12;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
	begin
		slv_reg12 <= 0;
	end
else
	if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d12) )
		for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
			if ( S_AXI_WSTRB[byte_index] == 1 ) 
				begin
					slv_reg12[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
				end

reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg13;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
	begin
		slv_reg13 <= 0;
	end
else
	if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d13) )
		for ( byte_index = 0; byte_index <= (C_S_AXI_DATA_WIDTH/8)-1; byte_index = byte_index+1 )
			if ( S_AXI_WSTRB[byte_index] == 1 ) 
				begin
					slv_reg13[(byte_index*8) +: 8] <= S_AXI_WDATA[(byte_index*8) +: 8];
				end

reg [C_S_AXI_DATA_WIDTH-1:0]slv_reg14;
always @( posedge S_AXI_ACLK )
if ( S_AXI_ARESETN == 1'b0 )
	begin
		slv_reg14 <= 0;
	end
else
	if(slv_reg_wren & (axi_awaddr[ADDR_LSB+OPT_MEM_ADDR_BITS:ADDR_LSB]=='d14) )