master
wtf 2 years ago
parent 760391234c
commit fbd8854d4c

@ -1,46 +1,9 @@
// A2 Core Bridge

// should modularize as much as possible and just do messy rewiring here!
// adapt cores and buses with generic module
// one thread/core for now; multithread needs thread tag, deeper queues

// one thread/core for now

// possible extended command modifiers
// prefetch
// larx
// stcx
// lwsync
// hwsync
// tlbsync
// ici, icbi
// dci, dcbi, etc
// dcbtst
// dcbz
// tlbie, etc

// possible extended responses
// errors
// crit first, xfer# for larger bus width on core side
// credits
// resv valid
// stcx comp/pass
// sync ack
// back inv val/addr

// possible extra functions
// integrated L2
// doorbell/mailbox (peer/broadcast msg/rsp/intr side channel crossbar)



// cores must be contiguous, starting at 0
`define CORE_TYPE_NONE 4'h0
`define CORE_TYPE_A2L2 4'h1
`define CORE_TYPE_WB1 4'h2
`define CORE_TYPE_WB2 4'h3

`define BUS_TYPE_NONE 4'h0
`define BUS_TYPE_WB1 4'h1
`define BUS_TYPE_WB2 4'h2
`include "defs.v"

module A2WB #(
parameter [0:15] CORE_TYPES = {`CORE_TYPE_WB2, `CORE_TYPE_NONE, `CORE_TYPE_NONE, `CORE_TYPE_NONE},
@ -61,6 +24,8 @@ genvar i;
// ------------------------------------------------------------------------------------------------
// I/O Connections

// cores must be contiguous, starting at 0

wire i_wb_cyc [0:3];
wire i_wb_stb [0:3];
wire [31:2] i_wb_adr[0:3] ;
@ -80,13 +45,13 @@ wire [7:0] ext_rsp [0:3];
generate
for (i = 0; i < 4; i++) begin
case (CORE_TYPES[i*4:i*4+3])
4'h0: begin
`CORE_TYPE_NONE: begin
end
4'h1: begin
`CORE_TYPE_A2L2: begin
assign NUMCORES = NUMCORES + 1;
// a2l2
end
4'h2: begin
`CORE_TYPE_WB1: begin
assign NUMCORES = NUMCORES + 1;

wire [78:0] core_0_in;
@ -101,7 +66,7 @@ generate
assign core_out[i][32] = d_wb_ack[i];
assign core_out[i][31:0] = d_wb_datr[i];
end
4'h3: begin
`CORE_TYPE_WB2: begin
assign NUMCORES = NUMCORES + 1;

wire [110:0] core_in[i];
@ -126,137 +91,128 @@ generate
endgenerate

// ------------------------------------------------------------------------------------------------
// Command Queues/Addr Compare/Bypass
// Command Interfaces
//
// cores can have either 1 or 2 buses; assume single-cmd outstanding per, for now
// a2l2 could also allow 1 ld, 1 st credit and use 2 dedicated queues
reg [77:0] cmd_queue_q[0:3][0:1];
wire [77:0] cmd_queue_d[0:3][0:1];
wire [77:0] cmd_queue_in[0:3][0:1];
wire [71:0] cmd_queue_out[0:3];

generate
for (i = 0; i < 4; i++) begin
case (CORE_TYPES[i*4:i*4+3])
4'h0: begin
`CORE_TYPE_NONE: begin
end
4'h1: begin
`CORE_TYPE_A2L2: begin
// convert a2l2 to internal format
end
4'h2: begin
// q[0] = i or d
assign cmd_queue_in[i][0][77] = d_wb_cyc[i] & d_wb_stb[i]; // valid
assign cmd_queue_in[i][0][76] = d_wb_we[i];
assign cmd_queue_in[i][0][75:72] = d_wb_sel[i];
assign cmd_queue_in[i][0][71:40] = d_wb_adr[i];
assign cmd_queue_in[i][0][39:8] = d_wb_datw[i];
assign cmd_queue_in[i][0][7:0] = ext_cmd[i];
`CORE_TYPE_WB1: begin
cmd_wb #(.CORE_TYPE(CORE_TYPES[i*4:i*4+3]), .BUS_TYPE(BUS_TYPE)) core_in (
.clk(clk),
.rst(rst),
.i_wb_cyc('b0),
.i_wb_stb('b0),
.i_wb_adr('h0),
.d_wb_cyc(d_wb_cyc[i]),
.d_wb_stb(d_wb_stb[i]),
.d_wb_we(d_wb_we[i]),
.d_wb_sel(d_wb_sel[i]),
.d_wb_adr(d_wb_adr[i]),
.d_wb_datw(d_wb_datw[i]),
.ext_cmd(ext_cmd[i]),
.cmd_taken('b0),
.cmd_out_0(),
.cmd_out_1()
);
end
4'h3: begin
// q[0]=i, q[1]=d
assign cmd_queue_in[i][0][77] = i_wb_cyc[i] & i_wb_stb[i]; // valid
assign cmd_queue_in[i][0][76] = 'b0;
assign cmd_queue_in[i][0][75:72] = 'b0000;
assign cmd_queue_in[i][0][71:40] = d_wb_adr[i];
assign cmd_queue_in[i][0][39:8] = 'h000000;
assign cmd_queue_in[i][0][7:0] = ext_cmd[i];
assign cmd_queue_in[i][0][77] = d_wb_cyc[i] & d_wb_stb[i]; // valid
assign cmd_queue_in[i][0][76] = d_wb_we[i];
assign cmd_queue_in[i][0][75:72] = d_wb_sel[i];
assign cmd_queue_in[i][0][71:40] = d_wb_adr[i];
assign cmd_queue_in[i][0][39:8] = d_wb_datw[i];
assign cmd_queue_in[i][0][7:0] = ext_cmd[i];
`CORE_TYPE_WB2: begin
cmd_wb #(.CORE_TYPE(CORE_TYPES[i*4:i*4+3]), .BUS_TYPE(BUS_TYPE)) core_in (
.clk(clk),
.rst(rst),
.i_wb_cyc(i_wb_cyc[i]),
.i_wb_stb(i_wb_stb[i]),
.i_wb_adr(i_wb_adr[i]),
.d_wb_cyc(d_wb_cyc[i]),
.d_wb_stb(d_wb_stb[i]),
.d_wb_we(d_wb_we[i]),
.d_wb_sel(d_wb_sel[i]),
.d_wb_adr(d_wb_adr[i]),
.d_wb_datw(d_wb_datw[i]),
.ext_cmd(ext_cmd[i]),
.cmd_taken('b0),
.cmd_out_0(),
.cmd_out_1()
);
end
endcase
end
endgenerate

// ------------------------------------------------------------------------------------------------
// SMP

// larx/stcx
// assume: if larx hits L1, core invalidates line automatically -> do not need to send back-invalidate
// reservation granule is 32B (or use lcd of all cores)
// one reservation per thread
// reservation is set before core receives reload data

wire stcx_store [0:3];
wire resv_ra_hit [0:3];
wire resv_set [0:3];
wire resv_rst [0:3];
wire [27:0] resv_q [0:3]; // v, @31:5
wire [27:0] resv_d [0:3];

generate
for (i = 0; i < 4; i++) begin
// Arbitration
//
// LRU, etc. select from pending cmds; also needs smp to stall some/all cmds
// do addr cmp here, if necessary? or could do in smp

end
endgenerate
arb #() arb (

// sync ack
);

// cache ops
// ------------------------------------------------------------------------------------------------
// SMP

// tlb ops
// special ops: track resv, stall pending cmds, gen rsp
smp #() smp (

// ------------------------------------------------------------------------------------------------
// Arbitration
//
// LRU, etc. select from pending cmds
generate
for (i = 0; i < 4; i++) begin
end
endgenerate
);

// ------------------------------------------------------------------------------------------------
// Bus Out
// commands to main bus

generate
if (BUS_TYPE == `BUS_TYPE_WB1) begin

end else if (BUS_TYPE == `BUS_TYPE_WB2) begin

wire [101:0] bus_out;
wire bus_i_wb_stb;
assign bus_out[101] = bus_i_wb_stb;
wire [31:2] bus_i_wb_adr;
assign bus_out[100:71] = bus_i_wb_adr;
wire bus_d_wb_cyc;
assign bus_out[70] = bus_d_wb_cyc;
wire bus_d_wb_stb;
assign bus_out[69] = bus_d_wb_stb;
wire bus_d_wb_we;
assign bus_out[68] = bus_d_wb_we;
wire [3:0] bus_d_wb_sel;
assign bus_out[67:64] = bus_d_wb_sel;
wire [31:0] bus_d_wb_adr;
assign bus_out[63:32] = bus_d_wb_adr;
wire [31:0] bus_d_wb_datw;
assign bus_out[31:0] = bus_d_wb_datw;

end else begin
end
case(BUS_TYPE)
`BUS_TYPE_WB1: begin
end
`BUS_TYPE_WB2: begin
wire [101:0] bus_out;
wire bus_i_wb_stb;
assign bus_out[101] = bus_i_wb_stb;
wire [31:2] bus_i_wb_adr;
assign bus_out[100:71] = bus_i_wb_adr;
wire bus_d_wb_cyc;
assign bus_out[70] = bus_d_wb_cyc;
wire bus_d_wb_stb;
assign bus_out[69] = bus_d_wb_stb;
wire bus_d_wb_we;
assign bus_out[68] = bus_d_wb_we;
wire [3:0] bus_d_wb_sel;
assign bus_out[67:64] = bus_d_wb_sel;
wire [31:0] bus_d_wb_adr;
assign bus_out[63:32] = bus_d_wb_adr;
wire [31:0] bus_d_wb_datw;
assign bus_out[31:0] = bus_d_wb_datw;
end
endcase
endgenerate

// ------------------------------------------------------------------------------------------------
// Bus In
// responses from main bus

generate
if (BUS_TYPE == `BUS_TYPE_WB1) begin

end else if (BUS_TYPE == `BUS_TYPE_WB2) begin

wire [65:0] bus_in;
wire bus_i_wb_ack = bus_in[65];
wire [31:0] bus_i_wb_datr = bus_in[64:33];
wire bus_d_wb_ack = bus_in[32];
wire [31:0] bus_d_wb_datr = bus_in[31:0];

end else begin
end
case(BUS_TYPE)
`BUS_TYPE_WB1: begin
end
`BUS_TYPE_WB2: begin
wire [65:0] bus_in;
wire bus_i_wb_ack = bus_in[65];
wire [31:0] bus_i_wb_datr = bus_in[64:33];
wire bus_d_wb_ack = bus_in[32];
wire [31:0] bus_d_wb_datr = bus_in[31:0];
end
endcase
endgenerate

// ------------------------------------------------------------------------------------------------
// Response Queues
// responses for cores

generate
for (i = 0; i < 4; i++) begin
@ -265,6 +221,7 @@ endgenerate

// ------------------------------------------------------------------------------------------------
// Misc/Errors/Debug
// stuff

generate
for (i = 0; i < 4; i++) begin

@ -0,0 +1,13 @@

module arb # (
) (

);

// fairly choose 1 or 2 (depending on output buses) cmds
// mark taken from queue
// obey restrictions from smp, etc.
// detect addr collisions - not needed if no caching?


endmodule

@ -0,0 +1,15 @@
// a2l2 default: allow 1 ld, 1 st credit and use 2 dedicated queues

`include "defs.v"

reg [77:0] cmd_queue_q[0:3][0:1];
wire [77:0] cmd_queue_d[0:3][0:1];
wire [77:0] cmd_queue_in[0:3][0:1];
wire [71:0] cmd_queue_out[0:3];

module cmd_wb #(
parameter CORE_TYPE = CORE_TYPE_A2L2
) (
);

endmodule

@ -0,0 +1,132 @@
// Wishbone-Wishbone Command Interface

// allow single- or dual-wb in/out:
// 1/1 : passthru
// 1/2 : route to proper - but this requires indicator in extcmd to distinguish i vs. d
// 2/1 : arbitrate
// 2/2 : passthru
//
// also handle special ops when possible (dcbz, ...)

// select one command per output bus

// ext_cmd is not tied to i/d, but does it need to have multiple outstanding (nop=0, and valid that require i/d info also must have that info provided)
// needs ext_tkn if not tied to i/d; needs to set q valid if tied to i/d (based on ext i or d type)
// seems like these are all tied to an i or d and require a response, so shouldn't need i+d+ext outstanding?

// possible extended command modifiers
// prefetch
// larx
// stcx
// lwsync
// hwsync
// tlbsync
// ici, icbi
// dci, dcbi, etc
// dcbtst
// dcbz
// tlbie, etc

// possible extended responses
// errors
// crit first, xfer# for larger bus width on core side
// credits
// resv valid
// stcx comp/pass
// sync ack
// back inv val/addr

// possible extra functions
// integrated L2
// doorbell/mailbox (peer/broadcast msg/rsp/intr side channel crossbar)

`include "defs.v"

reg [77:0] cmd_queue_q[0:1];
wire [77:0] cmd_queue_d[0:1];
wire [77:0] cmd_queue_in[0:1];
wire [77:0] cmd_queue_out;
reg [7:0] ext_queue_q;
wire [7:0] ext_queue_d;
wire [7:0] ext_queue_in;

module cmd_wb #(
parameter CORE_TYPE = `CORE_TYPE_WB2,
parameter BUS_TYPE = `BUS_TYPE_WB2
) (
input clk,
input rst,
input i_wb_cyc,
input i_wb_stb,
input [31:2] i_wb_adr,
input d_wb_cyc,
input d_wb_stb,
input d_wb_we,
input [3:0] d_wb_sel,
input [31:0] d_wb_adr,
input [31:0] d_wb_datw,
input [7:0] ext_cmd,
input [2:0] cmd_taken, // bit vector, one per queued cmd (could simultaneously occur in some designs)
output [77:0] cmd_out_0,
output [77:0] cmd_out_1
);

case (CORE_TYPE)
`CORE_TYPE_WB1: begin
// q[0] = i or d
assign cmd_queue_in[0][77] = d_wb_cyc & d_wb_stb; // valid - may need ext decode too
assign cmd_queue_in[0][76] = d_wb_we;
assign cmd_queue_in[0][75:72] = d_wb_sel;
assign cmd_queue_in[0][71:40] = d_wb_adr;
assign cmd_queue_in[0][39:8] = d_wb_datw;
assign cmd_queue_in[0][7:0] = ext_cmd;
end
`CORE_TYPE_WB2: begin
// q[0]=i, q[1]=d
assign cmd_queue_in[0][77] = i_wb_cyc & i_wb_stb; // valid - may need ext decode too
assign cmd_queue_in[0][76:72] = 'h0;
assign cmd_queue_in[0][71:40] = {i_wb_adr, 2'b0};
assign cmd_queue_in[0][39:8] = 'h0;
assign cmd_queue_in[0][7:0] = ext_cmd;
assign cmd_queue_in[1][77] = d_wb_cyc & d_wb_stb; // valid - may need ext decode too
assign cmd_queue_in[1][76] = d_wb_we;
assign cmd_queue_in[1][75:72] = d_wb_sel;
assign cmd_queue_in[1][71:40] = d_wb_adr;
assign cmd_queue_in[1][39:8] = d_wb_datw;
assign cmd_queue_in[1][7:0] = ext_cmd;
assign ext_queue_in = ext_cmd;
end
endcase

// queue routing/arbitration to cmd processing

// create generic command format out, handling i,d,ext!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// dual wb cmds may need taken AND next; depends though on what should be ordering; start with INORDER=1 parameter?

case (CORE_TYPE)
`CORE_TYPE_WB1: begin
case (BUS_TYPE)
`BUS_TYPE_WB1: begin
assign cmd_out_0 = cmd_queue_q[0];
end
`BUS_TYPE_WB2: begin
assign cmd_out_0 = ext_queue_q[0] ? cmd_queue_q[1] : cmd_queue_q[7]; // select i vs d
end
endcase
end
`CORE_TYPE_WB2: begin
case (BUS_TYPE)
`BUS_TYPE_WB1:begin
// both valid: send d
// want selected bit; set first cycle; dont change once selected until not valid
assign cmd_out_0 = cmd_queue_q[1][77] ? cmd_queue_q[1] : cmd_queue_q[0];
end
`BUS_TYPE_WB2: begin
assign cmd_out_0 = cmd_queue_q[0];
assign cmd_out_1 = cmd_queue_q[1];
end
endcase
end
endcase

endmodule

@ -0,0 +1,10 @@
// a2wb defines

`define CORE_TYPE_NONE 4'h0
`define CORE_TYPE_A2L2 4'h1
`define CORE_TYPE_WB1 4'h2
`define CORE_TYPE_WB2 4'h3

`define BUS_TYPE_NONE 4'h0
`define BUS_TYPE_WB1 4'h1
`define BUS_TYPE_WB2 4'h2

@ -8,13 +8,13 @@

* Dual (separate I/D) WB buses w/SMP extensions

* bus interface
* bus interfaces

* WB (non-SMP)
* single WB

* functions
* dual WB

* arbitrates for WB bus
* functions

* queues one or more core commands

@ -24,6 +24,9 @@

* optional mailbox interface for core-core peer and broadcast

* arbitrates for WB bus(es)

* gen responses for cores

## Possible configurations

@ -31,6 +34,9 @@

* one core, A2L2: bridge with SMP functions

* multi-core: bridge for A2L2, queueing, arbitration, and SMP functions
* multi-core: identical or mixed WB1/WB2/A2L2, queueing, arbitration, and SMP functions


### syntax check

```verilator --lint-only a2wb.v -Wno-LITENDIAN```

@ -0,0 +1,26 @@

module smp # (
) (

);

// larx/stcx
// assume: if larx hits L1, core invalidates line automatically -> do not need to send back-invalidate
// reservation granule is 32B (or use lcd of all cores)
// one reservation per thread
// reservation is set before core receives reload data

wire stcx_store [0:3];
wire resv_ra_hit [0:3];
wire resv_set [0:3];
wire resv_rst [0:3];
wire [27:0] resv_q [0:3]; // v, @31:5
wire [27:0] resv_d [0:3];

// sync ops

// cache ops

// tlb ops

endmodule
Loading…
Cancel
Save