diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7bc0784 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +# Ignore compiled binaries +* +!/**/ +!*.* +!makefile +*.out + +# Ignore gtkwave files +*.vcd + +# Ignore temporary memory file +mem.dat \ No newline at end of file diff --git a/6by6multiply.sh b/6by6multiply.sh new file mode 100755 index 0000000..d174934 --- /dev/null +++ b/6by6multiply.sh @@ -0,0 +1,13 @@ +#!/bin/bash +python2 setup_memory.py + +cat data_test.dat > matrix_mem.dat +cat prog_test.dat > prog_mem.dat + +echo "Running the BIG test..." +make +./multiplier6by6 + +python2 read_6by6result.py + +make clean \ No newline at end of file diff --git a/ProjectProposal.md b/ProjectProposal.md new file mode 100644 index 0000000..4161326 --- /dev/null +++ b/ProjectProposal.md @@ -0,0 +1,36 @@ +# GPU Matrix Operations +### Ariana Olson, Rocco DiVerdi, Serena Chen + +We want to study GPU architectures, and build a simple one to do matrix operations. We want to understand the strategy for multiplying matrices in the GPU, and how the GPU is programmed to perform these operations. We will also build a small toy matrix multiplier module in verilog, which can be scaled in scope depending on how much time the previous parts of the project take. + +## References: + +[Understanding the Efficiency of GPU Algorithms for Matrix-Matrix Multiplication](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.1.6823&rep=rep1&type=pdf) + +[Matrix computations on the GPU](https://developer.nvidia.com/sites/default/files/akamai/cuda/files/Misc/mygpu.pdf) + +[Matrix Multiplication with CUDA — A basic introduction to the CUDA programming model](https://www.shodor.org/media/content/petascale/materials/UPModules/matrixMultiplication/moduleDocument.pdf) + +[some random project from somewhere making a GPU](https://courses.cs.washington.edu/courses/cse467/15wi/docs/prj1.pdf) + +## MVP: + +Present to class about how GPUs perform matrix multiplication and other operations. And how it compares to MIPS CPU. + +Planned: Build a toy processor that performs operations and prints results to the terminal. + +Stretch: Visualize real time matrix transformations for a small set of “pixels” + +Super Stretch: be hired by Nvidia + +Even more super stretch: start a GPU company and make Nvidia obsolete + +## Work Plan: + +Thanksgiving break: sleep + +11/27-12/3: Learn about matrix multiplication on GPUs, run some example programs on actual GPUs. Set up documentation framework. Start writing up explanations of what we have learned. + +12/4-12/10: Start implementing verilog modules. Create documentation on performance, how to run tests/examples, block diagrams, etc. + +12/10-12/14: Debugging, polishing, documenting. (try not to be dead) diff --git a/add3by3.t.v b/add3by3.t.v new file mode 100644 index 0000000..eca5117 --- /dev/null +++ b/add3by3.t.v @@ -0,0 +1,61 @@ +/* +Test bench for 3 by 3 matrix adder +*/ + +`include "add3by3.v" + +module add3by3_TEST(); + parameter ENTRY_SIZE = 9; + + reg[ENTRY_SIZE - 1:0] a0, a1, a2, a3, a4; + reg[ENTRY_SIZE - 1:0] a5, a6, a7, a8; + reg[ENTRY_SIZE - 1:0] b0, b1, b2, b3, b4; + reg[ENTRY_SIZE - 1:0] b5, b6, b7, b8; + wire[ENTRY_SIZE - 1:0] c0, c1, c2, c3, c4; + wire[ENTRY_SIZE - 1:0] c5, c6, c7, c8; + + add3by3 #(.ENTRY_SIZE(ENTRY_SIZE)) dut ( + .a0(a0), .a1(a1), .a2(a2), .a3(a3), .a4(a4), + .a5(a5), .a6(a6), .a7(a7), .a8(a8), .b0(b0), + .b1(b1), .b2(b2), .b3(b3), .b4(b4), .b5(b5), + .b6(b6), .b7(b7), .b8(b8), .c0(c0), .c1(c1), + .c2(c2), .c3(c3), .c4(c4), .c5(c5), .c6(c6), + .c7(c7), .c8(c8) + ); + + initial begin + // Add two matrices + a0 = 1; a1 = 2; a2 = 3; a3 = 4; a4 = 5; a5 = 6; a6 = 7; a7 = 8; a8 = 9; + b0 = 30; b1 = 29; b2 = 28; b3 = 27; b4 = 26; b5 = 25; b6 = 24; b7 = 23; b8 = 22; + + #50 + + if (c0 !== 5'd31) begin + $display("Test case failed.\nExpected value of c0: %b\tActual value of c0: %b", a0 + b0, c0); + end + if (c1 !== 5'd31) begin + $display("Test case failed.\nExpected value of c1: %b\tActual value of c1: %b", a1 + b1, c1); + end + if (c2 !== 5'd31) begin + $display("Test case failed.\nExpected value of c2: %b\tActual value of c2: %b", a2 + b2, c2); + end + if (c3 !== 5'd31) begin + $display("Test case failed.\nExpected value of c3: %b\tActual value of c3: %b", a3 + b3, c3); + end + if (c4 !== 5'd31) begin + $display("Test case failed.\nExpected value of c4: %b\tActual value of c4: %b", a4 + b4, c4); + end + if (c5 !== 5'd31) begin + $display("Test case failed.\nExpected value of c5: %b\tActual value of c5: %b", a5 + b5, c5); + end + if (c6 !== 5'd31) begin + $display("Test case failed.\nExpected value of c6: %b\tActual value of c6: %b", a6 + b6, c6); + end + if (c7 !== 5'd31) begin + $display("Test case failed.\nExpected value of c7: %b\tActual value of c7: %b", a7 + b7, c7); + end + if (c8 !== 5'd31) begin + $display("Test case failed.\nExpected value of c8: %b\tActual value of c8: %b", a8 + b8, c8); + end + end +endmodule \ No newline at end of file diff --git a/add3by3.v b/add3by3.v new file mode 100644 index 0000000..d10a07a --- /dev/null +++ b/add3by3.v @@ -0,0 +1,41 @@ +/* +Add two 3 by 3 matrix blocks. +Performs the operation A + B = C + +Entries 0 to 8 +of all matrices are arranged in the following order: + + x0 x1 x2 + x3 x4 x5 + x6 x7 x8 + +Inputs: + - a0, ..., a8: the entries of matrix A + - b0, ..., a8: the entries of matrix B +Outputs: + - c0, ..., c8: the entries of matrix C +Parameters: + - ENTRY_SIZE: the number of bits of each matrix entry +*/ + +module add3by3 +#( + parameter ENTRY_SIZE = 5 +)( + input[ENTRY_SIZE - 1:0] a0, a1, a2, a3, + input[ENTRY_SIZE - 1:0] a4, a5, a6, a7, a8, + input[ENTRY_SIZE - 1:0] b0, b1, b2, b3, + input[ENTRY_SIZE - 1:0] b4, b5, b6, b7, b8, + output[ENTRY_SIZE - 1:0] c0, c1, c2, c3, + output[ENTRY_SIZE - 1:0] c4, c5, c6, c7, c8 +); + assign c0 = a0 + b0; + assign c1 = a1 + b1; + assign c2 = a2 + b2; + assign c3 = a3 + b3; + assign c4 = a4 + b4; + assign c5 = a5 + b5; + assign c6 = a6 + b6; + assign c7 = a7 + b7; + assign c8 = a8 + b8; +endmodule \ No newline at end of file diff --git a/arithmetic.t.v b/arithmetic.t.v new file mode 100644 index 0000000..9bc805f --- /dev/null +++ b/arithmetic.t.v @@ -0,0 +1,42 @@ +/* +Test bench for the scalar multiplication module +*/ + +`include "arithmetic.v" +module scalar_multiplication_TEST(); + +parameter ENTRY_SIZE = 5; +parameter RESENTRY_SIZE = 9; + +reg[ENTRY_SIZE - 1:0] a, b; +wire[RESENTRY_SIZE - 1:0] out; + +scalar_multiplication #(.ENTRY_SIZE(ENTRY_SIZE), .RESENTRY_SIZE(RESENTRY_SIZE)) dut (a, b, out); + +initial begin + // Multiply by 0 + a = 5'd0; b = 5'd0; #50 + if (out !== 9'd0) begin + $display("Test failed. Expected output: %b. Actual output: %b", 9'd0, out); + end + + // Multiply by 1 + a = 5'd1; b = 5'd15; #50 + if (out !== 9'd15) begin + $display("Test failed. Expected output: %b. Actual output: %b", 9'd15, out); + end + + // Multiply two numbers that do not overflow the result size + a = 5'd25; b = 5'd20; #50 + if (out !== 9'd500) begin + $display("Test failed. Expected output: %b. Actual output: %b", 9'd500, out); + end + + // Multiply two numbers that overflow the result size + // Expect the most significant bits to be truncated + a = 5'd31; b = 5'd25; #50 + if (out !== 9'd263) begin + $display("Test failed. Expected output: %b. Actual output: %b", 9'd263, out); + end +end +endmodule // testMult \ No newline at end of file diff --git a/arithmetic.v b/arithmetic.v new file mode 100644 index 0000000..fd9d04b --- /dev/null +++ b/arithmetic.v @@ -0,0 +1,24 @@ +/* +Performs a scalar multiplication on two numbers. +Represents the operation a * b = c. +Inputs: + - a and b are the two scalars being multiplied +Outputs: + - result is the scalar product. +Parameters: + - ENTRY_SIZE is the size of a and b + - RESENTRY_SIZE is the size of result +*/ +module scalar_multiplication +#( + parameter ENTRY_SIZE = 5, + parameter RESENTRY_SIZE = 9 +) +( + input [ENTRY_SIZE - 1:0] a, b, + output [RESENTRY_SIZE - 1:0] result +); + +assign result = a*b; + +endmodule // multiplication diff --git a/block_test_data.dat b/block_test_data.dat new file mode 100644 index 0000000..48330d4 --- /dev/null +++ b/block_test_data.dat @@ -0,0 +1,73 @@ +// This is the memory that should be used in the load block test + +// 3 by 3 identity at address 0 +0_0001 +0_0000 +0_0000 +0_0000 +0_0001 +0_0000 +0_0000 +0_0000 +0_0001 + +// 4 by 4 increasing at address 9 +0_0000 +0_0001 +0_0010 +0_0011 +0_0100 +0_0101 +0_0110 +0_0111 +0_1000 +0_1001 +0_1010 +0_1011 +0_1100 +0_1101 +0_1110 +0_1111 + +// 7 by 3 decreasing at address 25 +1_1111 +1_1110 +1_1101 +1_1100 +1_1011 +1_1010 +1_1001 +1_1000 +1_0111 +1_0110 +1_0101 +1_0100 +1_0011 +1_0010 +1_0001 +1_0000 +0_1111 +0_1110 +0_1101 +0_1100 +0_1011 + +// Extra memory at address 46 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 diff --git a/controller.t.v b/controller.t.v new file mode 100644 index 0000000..634f58e --- /dev/null +++ b/controller.t.v @@ -0,0 +1,109 @@ +/* +Test bench for controller module. +*/ + +`include "controller.v" + +module controller_TEST(); + parameter NUM_TESTS = 3; + + reg Clk; + + wire data_we; + wire weA, weB, weC, weD, weE, weF, weG, weH; + wire[1:0] jklm_select; + wire next_row, column; + + controller dut(Clk, data_we, weA, weB, weC, weD, weE, weF, weG, weH, jklm_select, next_row, column); + + reg[5:0] success_count = 0; + + initial Clk = 0; + integer i; + + initial #1000 $finish; + +initial begin + $dumpfile("controller.vcd"); + $dumpvars(); + + #5 + // Test Case 1: command 1 + if (data_we != 0) begin + $display("Test Case 1 Failed: data_we not set correctly"); + end + else if (weA !== 1 & {weB, weC, weD, weE, weF, weG, weH} !== 7'b0) begin + $display("Test Case 1 Failed: write enables not set correctly"); + end + else if (next_row != 0) begin + $display("Test Case 1 Failed: next row is signalled incorrectly"); + end + else if (column != 0) begin + $display("Test Case 1 Failed: column is signalled incorrectly"); + end + else begin + // test passed + success_count = success_count+1; + end + + + // cycle clock + Clk = 1; + #5 Clk=0; + #5 + + // Test Case 2: command 2 + if (data_we != 0) begin + $display("Test Case 2 Failed: data_we not set correctly"); + end + else if (weB !== 1 & {weA, weC, weD, weE, weF, weG, weH} !== 7'b0) begin + $display("Test Case 2 Failed: write enables not set correctly"); + end + else if (next_row != 1) begin + $display("Test Case 2 Failed: next row is signalled incorrectly"); + end + else if (column != 0) begin + $display("Test Case 2 Failed: column is signalled incorrectly"); + end + else begin + // test passed + success_count = success_count+1; + end + + // cycle clock 9 times + for (i=0; i<9; i=i+1) begin + Clk = 1; + #5 Clk=0; + #5; + end + + // Test Case 3: command 11 + if (data_we != 1) begin + $display("Test Case 3 Failed: data_we not set correctly"); + end + else if ( {weA, weB, weC, weD, weE, weF, weG, weH} != 8'b0) begin + $display("Test Case 3 Failed: write enable set on store cmd"); + end + else if (jklm_select != 2'b10) begin + $display("Test Case 3 Failed: mux select incorrect"); + end + else if (next_row != 0) begin + $display("Test Case 3 Failed: next row is signalled incorrectly"); + end + else if (column != 1) begin + $display("Test Case 3 Failed: column is signalled incorrectly"); + end + else begin + // test passed + success_count = success_count+1; + end + + if (success_count < NUM_TESTS) begin + $display("\nController Failed %d Tests\n",(NUM_TESTS-success_count)); + end + else begin + $display("Controller Passed All %d tests", NUM_TESTS); + end + +end +endmodule diff --git a/controller.v b/controller.v new file mode 100644 index 0000000..1a9a216 --- /dev/null +++ b/controller.v @@ -0,0 +1,40 @@ +`include "prog_memory.v" +`include "fsm.v" + +/* +The controller runs through commands from program memory, changing commands +every clock cycle, and outputs the control signals for the various pieces +of the multiplier + +inputs: clk +outputs: same as fsm +*/ + +module controller +#( + parameter CMD_WIDTH = 5, + parameter ADDR_WIDTH = 32 +) +( + input clk, + output data_we, + output weA, weB, weC, weD, weE, weF, weG, weH, + output[1:0] jklm_select, + output next_row, column +); + + wire[CMD_WIDTH-1:0] cmd; + reg [ADDR_WIDTH-1:0] prog_count; + + // run program counter + initial prog_count = 0; + always @(posedge clk) begin + prog_count = prog_count + 1; + end + + prog_memory prog_mem (clk, cmd, prog_count); + + fsm state_machine (cmd, data_we, weA, weB, weC, weD, weE, weF, weG, weH, + jklm_select, next_row, column); + +endmodule diff --git a/data_memory.t.v b/data_memory.t.v new file mode 100644 index 0000000..8de56f8 --- /dev/null +++ b/data_memory.t.v @@ -0,0 +1,127 @@ +/* +Test bench for data memory module. +*/ + +`include "data_memory.v" + +module memory_TEST(); + parameter ENTRY_SIZE = 5; + parameter ADDRESS_WIDTH = 32; + reg clk; + reg [ADDRESS_WIDTH - 1:0] addr0, addr1, addr2, addr3, addr4; + reg [ADDRESS_WIDTH - 1:0] addr5, addr6, addr7, addr8; + reg writeEnable; + reg [ENTRY_SIZE - 1:0] dataIn0, dataIn1, dataIn2, dataIn3, dataIn4; + reg [ENTRY_SIZE - 1:0] dataIn5, dataIn6, dataIn7, dataIn8; + + wire [ENTRY_SIZE - 1:0] data0, data1, data2, data3, data4; + wire [ENTRY_SIZE - 1:0] data5, data6, data7, data8; + + data_memory #(.width(ENTRY_SIZE), .addresswidth(ADDRESS_WIDTH)) dut( + .clk(clk), .data0(data0), .data1(data1), .data2(data2), + .data3(data3), .data4(data4), .data5(data5), .data6(data6), + .data7(data7), .data8(data8), .addr0(addr0), .addr1(addr1), + .addr2(addr2), .addr3(addr3), .addr4(addr4), .addr5(addr5), + .addr6(addr6), .addr7(addr7), .addr8(addr8), .writeEnable(writeEnable), + .dataIn0(dataIn0), .dataIn1(dataIn1), .dataIn2(dataIn2), + .dataIn3(dataIn3), .dataIn4(dataIn4), .dataIn5(dataIn5), + .dataIn6(dataIn6), .dataIn7(dataIn7), .dataIn8(dataIn8) + ); + + initial clk = 0; + always #10 clk = !clk; + initial begin + $dumpfile("memory.vcd"); + $dumpvars(0, memory_TEST, dut.memory[0]); + + dataIn0 = 5'b00000; dataIn1 = dataIn0; dataIn2 = dataIn0; + dataIn3 = dataIn0; dataIn4 = dataIn5; dataIn6 = dataIn0; + dataIn7 = dataIn0; dataIn8 = dataIn0; + + addr0 = 32'd0; + addr1 = addr0 + (32'd1 << 2); + addr2 = addr1 + (32'd1 << 2); + addr3 = addr2 + (32'd1 << 2); + addr4 = addr3 + (32'd1 << 2); + addr5 = addr4 + (32'd1 << 2); + addr6 = addr5 + (32'd1 << 2); + addr7 = addr6 + (32'd1 << 2); + addr8 = addr7 + (32'd1 << 2); + + // Test Case 1: Do not write if writeEnable is low. + writeEnable = 0; dataIn0 = 5'h1f; + #20 + if (data0 === dataIn0) begin + $display("Test case 1 failed: memory was written to when writeEnable was false."); + end + + else if (data0 === 5'bx) begin + $display("Test case 1 failed: there is no memory at the given address."); + end + + #1000 + // Test case 2: Write to memory if writeEnable is high. + writeEnable = 1; + dataIn0 = 5'h1f; + dataIn1 = 5'h1e; + dataIn2 = 5'h1d; + dataIn3 = 5'h1c; + dataIn4 = 5'h1b; + dataIn5 = 5'h1a; + dataIn6 = 5'h19; + dataIn7 = 5'h18; + dataIn8 = 5'h17; + + #40 + + if (data0 === 5'bx) begin + $display("Test case 2 failed: there is no memory at the given address."); + end + + if (data0 !== dataIn0) begin + $display("Test case 2 failed: memory was not written to when writeEnable was true."); + end + + if (dut.memory[0] !== dataIn0) begin + $display("Test case 2 failed: the memory contained at the given address does not match dataIn0"); + end + + if (dut.memory[1] !== dataIn1) begin + $display("dut.memory: %b, dataIn: %b", dut.memory[8], dataIn1); + $display("Test case 2 failed: the memory contained at the given address does not match dataIn1"); + end + + if (dut.memory[2] !== dataIn2) begin + $display("Test case 2 failed: the memory contained at the given address does not match dataIn2"); + end + + if (dut.memory[3] !== dataIn3) begin + $display("Test case 2 failed: the memory contained at the given address does not match dataIn3"); + end + + if (dut.memory[4] !== dataIn4) begin + $display("Test case 2 failed: the memory contained at the given address does not match dataIn4"); + end + + if (dut.memory[5] !== dataIn5) begin + $display("Test case 2 failed: the memory contained at the given address does not match dataIn5"); + end + + if (dut.memory[6] !== dataIn6) begin + $display("Test case 2 failed: the memory contained at the given address does not match dataIn6"); + end + + if (dut.memory[7] !== dataIn7) begin + $display("Test case 2 failed: the memory contained at the given address does not match dataIn7"); + end + + if (dut.memory[8] !== dataIn8) begin + $display("Test case 2 failed: the memory contained at the given address does not match dataIn8"); + end + + + #1000 + + $finish; + end +endmodule \ No newline at end of file diff --git a/data_memory.v b/data_memory.v new file mode 100644 index 0000000..18359dc --- /dev/null +++ b/data_memory.v @@ -0,0 +1,56 @@ +/* +The memory where matrices are stored. +*/ + +module data_memory +#( + parameter addresswidth = 32, + parameter depth = addresswidth * 2, + parameter width = 32 +) +( + input clk, + output [width-1:0] data0, data1, data2, data3, data4, + output [width-1:0] data5, data6, data7, data8, + input [addresswidth-1:0] addr0, addr1, addr2, addr3, addr4, + input [addresswidth-1:0] addr5, addr6, addr7, addr8, + input writeEnable, + input [width-1:0] dataIn0, dataIn1, dataIn2, dataIn3, dataIn4, + input [width-1:0] dataIn5, dataIn6, dataIn7, dataIn8 +); + + + reg [width-1:0] memory [depth-1:0]; + + integer i; + + always @(negedge clk) begin + // for (i = 0; i < 100; i= i+1) + // $display(memory[i]); + // $display("dm"); + if(writeEnable == 1) begin + memory[addr0 >> 2] <= dataIn0; + memory[addr1 >> 2] <= dataIn1; + memory[addr2 >> 2] <= dataIn2; + memory[addr3 >> 2] <= dataIn3; + memory[addr4 >> 2] <= dataIn4; + memory[addr5 >> 2] <= dataIn5; + memory[addr6 >> 2] <= dataIn6; + memory[addr7 >> 2] <= dataIn7; + memory[addr8 >> 2] <= dataIn8; + end + end + + assign data0 = memory[addr0 >> 2]; + assign data1 = memory[addr1 >> 2]; + assign data2 = memory[addr2 >> 2]; + assign data3 = memory[addr3 >> 2]; + assign data4 = memory[addr4 >> 2]; + assign data5 = memory[addr5 >> 2]; + assign data6 = memory[addr6 >> 2]; + assign data7 = memory[addr7 >> 2]; + assign data8 = memory[addr8 >> 2]; + + initial $readmemb("matrix_mem.dat", memory); + +endmodule diff --git a/docs/_config.yml b/docs/_config.yml new file mode 100644 index 0000000..1885487 --- /dev/null +++ b/docs/_config.yml @@ -0,0 +1 @@ +theme: jekyll-theme-midnight \ No newline at end of file diff --git a/docs/img/3by3mulltiplier.jpg b/docs/img/3by3mulltiplier.jpg new file mode 100644 index 0000000..a0b9ead Binary files /dev/null and b/docs/img/3by3mulltiplier.jpg differ diff --git a/docs/img/AddBlock.jpg b/docs/img/AddBlock.jpg new file mode 100644 index 0000000..3ef44cc Binary files /dev/null and b/docs/img/AddBlock.jpg differ diff --git a/docs/img/Controller.jpg b/docs/img/Controller.jpg new file mode 100644 index 0000000..e41d16f Binary files /dev/null and b/docs/img/Controller.jpg differ diff --git a/docs/img/FullMultiplier.jpg b/docs/img/FullMultiplier.jpg new file mode 100644 index 0000000..ae3c041 Binary files /dev/null and b/docs/img/FullMultiplier.jpg differ diff --git a/docs/img/LoadBlock.jpg b/docs/img/LoadBlock.jpg new file mode 100644 index 0000000..8cb4767 Binary files /dev/null and b/docs/img/LoadBlock.jpg differ diff --git a/docs/img/MatrixManager.jpg b/docs/img/MatrixManager.jpg new file mode 100644 index 0000000..6cb88a5 Binary files /dev/null and b/docs/img/MatrixManager.jpg differ diff --git a/docs/img/MultiplierNetwork.jpg b/docs/img/MultiplierNetwork.jpg new file mode 100644 index 0000000..5b22e76 Binary files /dev/null and b/docs/img/MultiplierNetwork.jpg differ diff --git a/docs/img/MultiplierwithRegisters.jpg b/docs/img/MultiplierwithRegisters.jpg new file mode 100644 index 0000000..749117c Binary files /dev/null and b/docs/img/MultiplierwithRegisters.jpg differ diff --git a/docs/img/dot.jpg b/docs/img/dot.jpg new file mode 100644 index 0000000..237b1eb Binary files /dev/null and b/docs/img/dot.jpg differ diff --git a/docs/img/matrices.PNG b/docs/img/matrices.PNG new file mode 100644 index 0000000..7c2a33b Binary files /dev/null and b/docs/img/matrices.PNG differ diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..c5941cd --- /dev/null +++ b/docs/index.md @@ -0,0 +1,110 @@ +--- +--- + +# Matrix Multiplication + +## Abstract + +We designed computer hardware to optimize multiplying two 6x6 matrices. Matrix multiplication is a complex and time intensive operation for normal computer architectures because there are a large number of individual computations; however, it is used frequently for machine learning and computer graphics. Because each value in the result matrix can be computed independently from any other result, matrix multiplication is an excellent candidate for parallelization. We wanted to explore the potential efficiency gain from using a purpose built matrix multiplier, and see how to split a workload among a large number of computing units. + +## Project Motivation and Background + +After studying single cycle MIPS CPUs, we were interested in other computing architectures with different strengths and weaknesses. We first looked at GPUs, which were originally designed specifically for graphics, but have recently been generalized to have all the functionality of CPUs. Graphics operations are essentially a subset of matrix operations, so GPUs with the ability to perform the same operations as CPUs have become increasingly useful for machine learning algorithms and general parallel computing. In particular, computations on matrices are complex and time intensive on normal CPUs, but the extreme parallelization of computations in a GPU makes these operations much more efficient. We wanted to explore this increase in efficiency by making some purpose built hardware for matrix multiplication. + +## How to use + +We made custom architecture that can multiply two 6x6 arrays of 5 bit unsigned integers (or smaller arrays, if you pad the rest of the matrices with zeros). + +Before you run the program, download [numpy](https://www.scipy.org/scipylib/download.html). And make sure you have Python2 and Verilog. + +To run the program, clone the [repository](https://github.com/poosomooso/FinalProject) and run `./6by6multiply.sh`. If you wish to try different matrices, you can change the matrices in`setup_memory.py`. The final memory is written to `memory_out.txt`, and the result matrix can be read easily using `read_6by6result.py`. (this just takes the chunk of memory storing the result matrix and formats it nicely). + +For information about how to run unit tests and build on the current work, please use this [reference](testing.md) + +## Implementation + +![](img/FullMultiplier.jpg) + +**Figure 1** : High level block diagram of the system (components explained below). +The overall system consists of a controller, which steps through a program described in program memory, A multiplier network which has all of the hardware to multiply two 6 by 6 matrices together, and a memory manager to load a matrix from memory and store the result back. This hardware requires a data memory preloaded with the matrices to be multiplied and a program memory preloaded with all of the steps required to multiply the matrices. Because we limited our scope to multiplying 6 by 6 matrices, the program memory is always the same; however, it is structured as program memory so that the fuctionality of our hardware could be extended more easily. + +![](img/3by3mulltiplier.jpg) + +**Figure 2** : Our core multiplier - multiplies two 3x3 matrices. The core multiplier contains a matrix multiplication module and a collection of registers to store the matrices. A and B are the input matrices, and C is the result. + +The heart of our algorithm is the matrix multiplication unit, or the multiplier. It takes two 3x3 matrices and multiplies them. It consists of 9 dot product modules that dot product vectors of length 3. The dot products happen in parallel. The multiplier also contains a small collection of registers that temporarily store the input and output matrices until other modules use the result. + +![](img/matrices.PNG) + +**Figure 3** : the algorithm we use to multiply the 6x6 matrix, where each capital letter represents the 3x3 section of the matrix. + +Each 6x6 matrix is broken down into 4 3x3 matrices. The figure above shows the algorithm we use to multiply the broken up matrices, where each 3x3 matrix is represented with a capital letter. We are basically breaking up the dot products of each row and column vector into 3x3 chunks, and adding them together. This is a simplified version of the [divide and conquer algorithm](https://en.wikipedia.org/wiki/Matrix_multiplication_algorithm#Divide_and_conquer_algorithm), which is preferred over performing each dot product, generally because of caching (though not relevant in our case). In our case, we preferred it because it allowed us to have constant size multiplication blocks (the aforementioned 3x3 matrix multiplication unit), so we didn’t have to make variable size dot product modules. Although we currently constrained ourselves to 6x6 matrices, using the 3x3 modules allows us to scale in the future. + +![](img/MultiplierNetwork.jpg) + +**Figure 4** : The multiplier network that parallelizes the 8 dot products and 4 additions necessary to multiply the 6x6 matrix. + +The algorithm is performed and parallelized in the multiplier network. The multiplier network contains 8 multipliers, one for each pair of broken up matrices that need to be multiplied. It also contains four 3x3 matrix adders. Each 3x3 matrix in the result matrix of the above figure uses 2 of these multipliers and one matrix adder, so all of the resultant 3x3 matrices are calculated independent of each other. + + + +**Figure 5** : The matrix manager that handles the main memory and breaks down the matrices. The top is the load block module which generates the addresses for the 3x3 block in memory. The bottom is the full matrix manager, which uses the load block, data memory, and an address register. It also has a block for determining the next address, which is mostly muxes and arithmetic. + +We use the matrix manager to break down the matrices and populate the multiplier network correctly. The matrix manager manages loads and stores to data memory. It also computes and keeps track of the index of the first element of the 3x3 arrays, and uses a load block module to retrieve the 3x3 array starting from the computed starting index. The output from the matrix manager goes to the network of multipliers. The output from the multiplier network is fed back into the matrix manager to store into memory. We store the matrices as a vector in data memory, as follows: + +|3 by 3 Matrix | Data Memory | +| --------- | -------- | +|a11 / a12 / a13 | a11 | +|a21 / a22 / a23 | a12 | +|a31 / a32 / a33 | a13 | +| | a21 | +| | a22 | +| | a23 | +| | a31 | +| | a32 | +| | a33 | + +These matrices are stored sequentially in memory, and the resultant matrix is stored directly following the second matrix in memory. + +![](img/Controller.jpg) + +**Figure 6** : The module generating the control signals, backed by an FSM that generates signals based on a series of instructions. + +Finally we have the controller. The controller reads in commands from a file and writes them to the FSM. The FSM breaks down the command into two sections: the *type*, which is either ‘00’ for loading a matrix, or ‘01’ for saving a matrix, and the *block*, which represents a particular 3x3 matrix: + +| Matrix | Block Code | +| --- | --- | +| A | 000 | +| B | 001 | +| C | 010 | +| D | 011 | +| E | 100 | +| F | 101 | +| G | 110 | +| H | 111 | +| J | X00 | +| K | X01 | +| L | X10 | +| M | X11 | + +We can generate all the control signals through these two codes. We use *type* to tell us whether we are working with input matrices or the resultant matrix. From the two input matrices, we can tell which one we are using by looking at the MSB of *block*. And we can tell when we need to move to the next row of 3x3 matrices by looking at the LSB of *block* — all the blocks on the left side of the matrix have a 0 in the LSB, and all the blocks on the right side of the matrix have a one in the LSB. + +That last trick only works because we are dealing with 6x6 matrices, so they get broken down into 2x2 (i.e. binary) squares. If we were to scale the size of the matrices, we would have to come up with a new way of detecting when to go to the next row of 3x3 matrices. + +We use a Python script to generate the program memory and the initial data memory. The Python script has two numpy arrays hardcoded, that can be changed at any time. It then converts the numpy arrays to be binary encoded, and writes the array to a file.It also writes the program memory to a file, which for the 6x6 multiplier, should be the same every time, since the instructions are only dependent on the size of the matrix. + +## Reflection + +While being constrained to only a 6x6 matrix falls a bit short of our goals, we did not anticipate how complicated it would be to design all the small modules in the design and wire them up together properly. However, we did stay on track with our workplan and reach a version of our planned goal. Our workplan was intentionally vague, since at the beginning of the project, we knew there were a lot of options to explore, and we didn’t know how this project would manifest. Overall, we learned a lot about implementing matrix multiplication, and techniques for parallelizing. + +One thing that we hoped to implement, but was not able due to time, was full parallelization of loading data. Currently, our matrix manager only loads one 3x3x matrix at a time, but the multiplier could be much more efficient if it could load all the matrices to the multipliers at the same time, and then write back all the results at the same time. A lot of our architecture that we built in the beginning (data memory, load block) assumed we would only be loading 9 entries at a time, since each multiplier unit was using 9 entries of each particular matrix. However, it wasn’t until later that we realized that our design would be much more efficient if we could load multiple 3x3 blocks at once. By then, so much of our design depended on individually loading these 9-entry blocks that it was difficult to go back and retroactively make changes. If we did this again, we would spend a little more time in the beginning, working on block diagrams. + +## Future Extensions + +I think the next logical step is to scale this up, where each dimension is an arbitrary multiple of 3. Then, multiplying matrices of any size, by padding the matrix with zeros until the dimensions become a multiple of 3. + +Another extension is to apply the matrix algorithm to actual science or graphics applications, and make customized hardware for a specific application. + + + + diff --git a/docs/testing.md b/docs/testing.md new file mode 100644 index 0000000..93fb2dc --- /dev/null +++ b/docs/testing.md @@ -0,0 +1,73 @@ +# How to Run Tests +## Getting Started +Before being able to run tests, you must compile the verilog files. +To do this, at the command line enter the command `make` This will +compile any of the necessary verilog files to create the test executables. If a file has already been compiled and no changes have been made to it or its dependencies, the file will not be recompiled. + +## Run Single Test +Enter the command +```bash +./ +``` + +To run a single file once it has been compiled. If all unit tests pass, nothing will be printed to the terminal. A notice that one or more tests failed will be printed to the terminal otherwise. + +## Run All Tests +The script `run_tests.sh` will compile and run all of the tests at once + +If you are running the script for the first time, you will need to type +```bash +chmod 755 run_tests.sh +``` +In order to gain permission to run the script. + +Once you have gained permission run: +```bash +./run_tests.sh +``` + +If all tests pass, the only output to the terminal from the script will be any commands run from the makefile and any notifications about .vcd files being opened for output. Otherwise, notice that one or more tests failed will be printed to the terminal. + +## Add a Test to Run +If a new testbench binary is created, it must be added to the script in order to be run. To do this, add the following lines to `run_tests.sh`: +```bash +echo "Running tests..." +./ +``` +## Run the integration test +The script `6by6multiply.sh` will compile and run the integration test, which tests the entire multiplier. The test works by writing the data contained at the memory addressing corresponding to the multiplied matrix to a data file. The python script `6by6multiply.sh` reads this data file and formats the data into a matrix,which is printed to the terminal. This matrix can be compared to the expected multiplication result, which is printed to the terminal by `setup_memory.py`. + +If you are running the script for the first time, you will need to type +```bash +chmod 755 6by6multiply.sh +``` +In order to gain permission to run the script. + +Once you have gained permission run: +```bash +./6by6multiplys.sh +``` +When run, you will see the input matrices (set in `setup_memory.py`) printed to the terminal along with the expected multiplication result. At the end, the actual output of the multiplier will be printed to the terminal. + +# How to Edit the Makefile +## Adding build targets +To add a build target for a new testbench, add the following lines to the makefile: +``` +: ... + iverilog -Wall -o +``` + +As a style choice, the binary name should match the target name. Dependencies can either be other build targets or verilog files. An example build target might look like: + +``` +dot: dot.v dot.t.v arithmetic + iverilog -Wall -o dot dot.t.v +``` + +In addition, the new build target must be added to the all build target at the top of the makefile, otherwise it will not be built automatically. + +``` +all: arithmetic dot matrixmultiplication +``` + + diff --git a/dot.t.v b/dot.t.v new file mode 100644 index 0000000..516b69b --- /dev/null +++ b/dot.t.v @@ -0,0 +1,66 @@ +/* +Test bench for the 3 dimensional vector dot product. +*/ + +`include "dot.v" + +module dot_TEST(); + + parameter ENTRY_SIZE = 5; + parameter RESENTRY_SIZE = 9; + + reg[ENTRY_SIZE - 1:0] a0, a1, a2, b0, b1, b2; + wire[RESENTRY_SIZE - 1:0] product; + + dot #(.ENTRY_SIZE(ENTRY_SIZE), .RESENTRY_SIZE(RESENTRY_SIZE)) dut (a0, a1, a2, b0, b1, b2, product); + initial begin + a0 = 5'd0; a1 = 5'd0; a2 = 5'd0; b0 = 5'd0; b1 = 5'd0; b2 = 5'd0; + #50 + if (product !== 9'd0) begin + $display("Test failed. Expected: %d. Actual: %d.", 9'd0, product); + end + + a0 = 5'd1; a1 = 5'd0; a2 = 5'd0; b0 = 5'd2; b1 = 5'd3; b2 = 5'd4; + #50 + if (product !== 9'd2) begin + $display("Test failed. Expected: %d. Actual: %d.", 9'd2, product); + end + + a0 = 5'd0; a1 = 5'd1; a2 = 5'd0; b0 = 5'd2; b1 = 5'd3; b2 = 5'd4; + #50 + if (product !== 9'd3) begin + $display("Test failed. Expected: %d. Actual: %d.", 9'd3, product); + end + + a0 = 5'd0; a1 = 5'd0; a2 = 5'd1; b0 = 5'd2; b1 = 5'd3; b2 = 5'd4; + #50 + if (product !== 9'd4) begin + $display("Test failed. Expected: %d. Actual: %d.", 9'd4, product); + end + + a0 = 5'd1; a1 = 5'd1; a2 = 5'd0; b0 = 5'd2; b1 = 5'd3; b2 = 5'd4; + #50 + if (product !== 9'd5) begin + $display("Test failed. Expected: %d. Actual: %d.", 9'd5, product); + end + + a0 = 5'd1; a1 = 5'd0; a2 = 5'd1; b0 = 5'd2; b1 = 5'd3; b2 = 5'd4; + #50 + if (product !== 9'd6) begin + $display("Test failed. Expected: %d. Actual: %d.", 9'd6, product); + end + + a0 = 5'd0; a1 = 5'd1; a2 = 5'd1; b0 = 5'd2; b1 = 5'd3; b2 = 5'd4; + #50 + if (product !== 9'd7) begin + $display("Test failed. Expected: %d. Actual: %d.", 9'd7, product); + end + + a0 = 5'd1; a1 = 5'd1; a2 = 5'd1; b0 = 5'd2; b1 = 5'd3; b2 = 5'd4; + #50 + if (product !== 9'd9) begin + $display("Test failed. Expected: %d. Actual: %d.", 9'd9, product); + end + end +endmodule // dot_TEST + diff --git a/dot.v b/dot.v new file mode 100644 index 0000000..e4832d3 --- /dev/null +++ b/dot.v @@ -0,0 +1,35 @@ +/* +Perform the dot product on two 3 dimensional vectors. +Represents the operation dot(a, b) = c +Inputs: + - a0, ..., a2 and b0, ..., b2 represent the entries of the vectors a and b. +Outputs: + - product represents the scalar c. +Parameters: + - ENTRY_SIZE is the size of each individual entry of a and b. + - RESENTRY_SIZE is the size of the product c. +*/ + +`include "arithmetic.v" + +module dot +#( + parameter ENTRY_SIZE = 5, + parameter RESENTRY_SIZE = 9 +) +( + input[ENTRY_SIZE - 1:0] a0, a1, a2, b0, b1, b2, + output[RESENTRY_SIZE - 1:0] product +); + +wire[RESENTRY_SIZE - 1:0] multres0; +wire[RESENTRY_SIZE - 1:0] multres1; +wire[RESENTRY_SIZE - 1:0] multres2; + +scalar_multiplication #(.ENTRY_SIZE(ENTRY_SIZE), .RESENTRY_SIZE(RESENTRY_SIZE)) multop0 (.a(a0), .b(b0), .result(multres0)); +scalar_multiplication #(.ENTRY_SIZE(ENTRY_SIZE), .RESENTRY_SIZE(RESENTRY_SIZE)) multop1 (.a(a1), .b(b1), .result(multres1)); +scalar_multiplication #(.ENTRY_SIZE(ENTRY_SIZE), .RESENTRY_SIZE(RESENTRY_SIZE)) multop2 (.a(a2), .b(b2), .result(multres2)); + +assign product = multres0 + multres1 + multres2; + +endmodule \ No newline at end of file diff --git a/fsm.t.v b/fsm.t.v new file mode 100644 index 0000000..e662e09 --- /dev/null +++ b/fsm.t.v @@ -0,0 +1,150 @@ +`include "fsm.v" + +module fsm_TEST(); + parameter NUM_TESTS = 5; + + reg[4:0] command; + wire data_we; + wire weA, weB, weC, weD, weE, weF, weG, weH; + wire[1:0] jklm_sel; + wire next_row; + wire column; + + fsm dut ( + .command(command), + .data_we(data_we), + .weA(weA), .weB(weB), .weC(weC), .weD(weD), + .weE(weE), .weF(weF), .weG(weG), .weH(weH), + .jklm_select(jklm_sel), + .next_row(next_row), + .column(column) + ); + + reg[5:0] success_count = 0; + + initial begin + $dumpfile("fsm.vcd"); + $dumpvars(); + + // Test Case 1: load to block A + command = 5'b0; + #10 + + if (data_we != 0) begin + $display("Test Case 1 Failed: data_we not set correctly"); + end + else if (weA !== 1 & {weB, weC, weD, weE, weF, weG, weH} !== 7'b0) begin + $display("Test Case 1 Failed: write enables not set correctly"); + end + else if (next_row != 0) begin + $display("Test Case 1 Failed: next row is signalled incorrectly"); + end + else if (column != 0) begin + $display("Test Case 1 Failed: column is signalled incorrectly"); + end + else begin + // test passed + success_count = success_count+1; + end + + // Test Case 2: load to block C + command = 5'b00011; + #10 + + if (data_we != 0) begin + $display("Test Case 2 Failed: data_we not set correctly"); + end + else if (weD !== 1 & {weA, weB, weC, weE, weF, weG, weH} !== 7'b0) begin + $display("Test Case 2 Failed: write enables not set correctly"); + end + else if (next_row != 1) begin + $display("Test Case 2 Failed: next row is signalled incorrectly"); + end + else if (column != 0) begin + $display("Test Case 2 Failed: column is signalled incorrectly"); + end + else begin + // test passed + success_count = success_count+1; + end + + // Test Case 3: load to block H + command = 5'b00111; + #10 + + if (data_we != 0) begin + $display("Test Case 3 Failed: data_we not set correctly"); + end + else if (weH !== 1 & {weA, weB, weC, weD, weE, weF, weG} !== 7'b0) begin + $display("Test Case 3 Failed: write enables not set correctly"); + end + else if (next_row != 1) begin + $display("Test Case 3 Failed: next row is signalled incorrectly"); + end + else if (column != 1) begin + $display("Test Case 3 Failed: column is signalled incorrectly"); + end + else begin + // test passed + success_count = success_count+1; + end + + // Test Case 4: store from block J + command = 5'b01000; + #10 + + if (data_we != 1) begin + $display("Test Case 4 Failed: data_we not set correctly"); + end + else if ({weA, weB, weC, weD, weE, weF, weG, weH} != 8'b0) begin + $display("Test Case 4 Failed: block we set on store"); + end + else if (jklm_sel != 2'b00) begin + $display("Test Case 4 Failed: jklm_select not correct"); + end + else if (next_row != 0) begin + $display("Test Case 4 Failed: next row is signalled incorrectly"); + end + else if (column != 1) begin + $display("Test Case 4 Failed: column is signalled incorrectly"); + end + else begin + // test passed + success_count = success_count+1; + end + + // Test Case 5: store from block M + command = 5'b01011; + #10 + + if (data_we != 1) begin + $display("Test Case 5 Failed: data_we not set correctly"); + end + else if ({weA, weB, weC, weD, weE, weF, weG, weH} != 8'b0) begin + $display("Test Case 5 Failed: block we set on store"); + end + else if (jklm_sel != 2'b11) begin + $display("Test Case 5 Failed: jklm_select not correct"); + end + else if (next_row != 1) begin + $display("Test Case 5 Failed: next row is signalled incorrectly"); + end + else if (column != 1) begin + $display("Test Case 5 Failed: column is signalled incorrectly"); + end + else begin + // test passed + success_count = success_count+1; + end + + + if (success_count < NUM_TESTS) begin + $display("\nFSM Failed %d Tests\n",(NUM_TESTS-success_count)); + end + else begin + $display("FSM Passed All %d tests", NUM_TESTS); + end + + end +endmodule + diff --git a/fsm.v b/fsm.v new file mode 100644 index 0000000..b6071bc --- /dev/null +++ b/fsm.v @@ -0,0 +1,62 @@ +/* +Manages the control signals throught the matrix multiplier based on an input command + +command structure: + |type|block| + | 0 0|0 0 0| + +type selects between load (00) and store (01) +block selects the block matrix to either load from or store to + 000 = A ... 111 = H + x00 = J ... x11 = M + +Inputs: + - command: the command from program memory, structure explained above +Outputs: + - data_we: write enable for the data memory + - weA-weH: write enables for each input to the computation blocks + - jklm_select: select bits for mux on the output matricies + - next_row: signals to the matrix manager to go to a new row of the matrix + - column: tells the matrix manager to use the column quantity for the first matrix (0), or the second and result matrices (1) +*/ + +module fsm +#( +parameter TYPE_LEN = 2, +parameter BLOCK_LEN = 3 +) +( + input[TYPE_LEN+BLOCK_LEN-1:0] command, + output data_we, + output weA, weB, weC, weD, weE, weF, weG, weH, + output[1:0] jklm_select, + output next_row, + output column +); + + wire[TYPE_LEN-1:0] type; + assign type = command[TYPE_LEN+BLOCK_LEN-1:BLOCK_LEN]; + assign data_we = type[0]; // modify if we need more types + + wire[BLOCK_LEN-1:0] block; + assign block = command[BLOCK_LEN-1:0]; + + // signaling the next row currently only works because we only use 6x6 matrices + assign next_row = block[0]; + + assign column = (block[2] && !type[0]) || type[0]; // 1 when we are using the second matrix or result + + assign jklm_select = block[1:0]; // last two bits of block select which matrix to store + + // decoder to enable loading of selected block + assign {weA, weB, weC, weD, weE, weF, weG, weH} = + ( block == 3'b000 & type==0) ? 8'b10000000 : + ( block == 3'b001 & type==0) ? 8'b01000000 : + ( block == 3'b010 & type==0) ? 8'b00100000 : + ( block == 3'b011 & type==0) ? 8'b00010000 : + ( block == 3'b100 & type==0) ? 8'b00001000 : + ( block == 3'b101 & type==0) ? 8'b00000100 : + ( block == 3'b110 & type==0) ? 8'b00000010 : + ( block == 3'b111 & type==0) ? 8'b00000001 : + 8'b00000000; +endmodule diff --git a/load_block.t.v b/load_block.t.v new file mode 100644 index 0000000..d9c057b --- /dev/null +++ b/load_block.t.v @@ -0,0 +1,145 @@ +`include "load_block.v" +`include "data_memory.v" + +// TODO(arianaolson419): create a test memory file with matrices. Add to testing script +module load_block_TEST(); + parameter ADDRESS_WIDTH = 32; + parameter ENTRY_SIZE = 5; + parameter NUM_TESTS = 4; + + reg[ADDRESS_WIDTH - 1:0] addr_initial; + reg[ADDRESS_WIDTH - 1:0] columns; + wire[ADDRESS_WIDTH - 1:0] addrOut0, addrOut1, addrOut2, addrOut3; + wire[ADDRESS_WIDTH - 1:0] addrOut4, addrOut5, addrOut6, addrOut7, addrOut8; + + address3by3block #(.MEM_ADDRESS_WIDTH(ADDRESS_WIDTH)) dut ( + .addr_initial(addr_initial), + .columns (columns), + .addr0 (addrOut0), + .addr1 (addrOut1), + .addr2 (addrOut2), + .addr3 (addrOut3), + .addr4 (addrOut4), + .addr5 (addrOut5), + .addr6 (addrOut6), + .addr7 (addrOut7), + .addr8 (addrOut8) + ); + + reg clk; + reg [ADDRESS_WIDTH - 1:0] addr0, addr1, addr2, addr3, addr4; + reg [ADDRESS_WIDTH - 1:0] addr5, addr6, addr7, addr8; + reg writeEnable; + reg [ENTRY_SIZE - 1:0] dataIn0, dataIn1, dataIn2, dataIn3, dataIn4; + reg [ENTRY_SIZE - 1:0] dataIn5, dataIn6, dataIn7, dataIn8; + + wire [ENTRY_SIZE - 1:0] data0, data1, data2, data3, data4; + wire [ENTRY_SIZE - 1:0] data5, data6, data7, data8; + + data_memory #(.width(ENTRY_SIZE), .addresswidth(ADDRESS_WIDTH)) mem ( + .clk(clk), .data0(data0), .data1(data1), .data2(data2), + .data3(data3), .data4(data4), .data5(data5), .data6(data6), + .data7(data7), .data8(data8), .addr0(addr0), .addr1(addr1), + .addr2(addr2), .addr3(addr3), .addr4(addr4), .addr5(addr5), + .addr6(addr6), .addr7(addr7), .addr8(addr8), .writeEnable(writeEnable), + .dataIn0(dataIn0), .dataIn1(dataIn1), .dataIn2(dataIn2), + .dataIn3(dataIn3), .dataIn4(dataIn4), .dataIn5(dataIn5), + .dataIn6(dataIn6), .dataIn7(dataIn7), .dataIn8(dataIn8) + ); + + reg[5:0] success_count = 0; + + initial begin + $dumpfile("load_block.vcd"); + $dumpvars(); + + // Test Case 1: read a 3x3 matrix from address zero + addr_initial = 0; + columns = 3; + + // cycle clock + clk = 0; + #1 clk = 1; + #1 + + if (addrOut0 === 0 & addrOut1 === 1 & addrOut2 === 2 & + addrOut3 === 3 & addrOut4 === 4 & addrOut5 === 5 & + addrOut6 === 6 & addrOut7 === 7 & addrOut8 === 8) + begin + // test passed + success_count = success_count+1; + end + else begin + $display("Test case 1 failed: 3x3 matrix at addres 0"); + end + + // Test Case 2: read top left block of 4x4 matrix from address 9 + addr_initial = 9; + columns = 4; + + // cycle clock + clk = 0; + #1 clk = 1; + #1 + + if (addrOut0 === 9 & addrOut1 === 10 & addrOut2 === 11 & + addrOut3 === 13 & addrOut4 === 14 & addrOut5 === 15 & + addrOut6 === 17 & addrOut7 === 18 & addrOut8 === 19) + begin + // test passed + success_count = success_count+1; + end + else begin + $display("Test case 2 failed: 4x4 matrix at address 9"); + end + + // Test Case 3: read bottom right block of 4x4 matrix + // (starting at address 14) + addr_initial = 14; + columns = 4; + + // cycle clock + clk = 0; + #1 clk = 1; + #1 + + if (addrOut0 === 14 & addrOut1 === 15 & addrOut2 === 16 & + addrOut3 === 18 & addrOut4 === 19 & addrOut5 === 20 & + addrOut6 === 22 & addrOut7 === 23 & addrOut8 === 24) + begin + // test passed + success_count = success_count+1; + end + else begin + $display("Test case 3 failed: 4x4 matrix at address 14"); + end + + // Test Case 4: 7 column matrix at address 25 + addr_initial = 25; + columns = 7; + + // cycle clock + clk = 0; + #1 clk = 1; + #1 + + if (addrOut0 === 25 & addrOut1 === 26 & addrOut2 === 27 & + addrOut3 === 32 & addrOut4 === 33 & addrOut5 === 34 & + addrOut6 === 39 & addrOut7 === 40 & addrOut8 === 41) + begin + // test passed + success_count = success_count+1; + end + else begin + $display("Test case 3 failed: 4x4 matrix at address 14"); + end + + if (success_count < NUM_TESTS) begin + $display("\nLoad Block Failed %d Tests\n",(NUM_TESTS-success_count)); + end + else begin + $display("Load Block Passed All %d tests", NUM_TESTS); + end + + end +endmodule diff --git a/load_block.v b/load_block.v new file mode 100644 index 0000000..e309ce4 --- /dev/null +++ b/load_block.v @@ -0,0 +1,45 @@ +/* +Address the entries of a 3 by 3 block of memory within a matrix stored in +the data memory. Matrices are stored in the data memory such that +each word is an entry of the matrix, and entries are stored one row at a time. + +Example: +3 by 3 Matrix Data Memory +------------- ----------- +a11 a12 a13 a11 +a21 a22 a23 => a12 +a31 a32 a33 a13 + a21 + a22 + a23 + a31 + a32 + a33 + +Inputs: + - addr_initial: the address of the first entry of the block. + - columns: the number of columns of the entire matrix stored in memory. +Outputs: + - addr: the address of the xth element of the block in data memory. +Parameters: + - MEM_ADDRESS_WIDTH: the number of bits of the addressees given to the data memory. +*/ + +module address3by3block +#(parameter MEM_ADDRESS_WIDTH = 32) +( + input[MEM_ADDRESS_WIDTH - 1:0] addr_initial, + input[MEM_ADDRESS_WIDTH - 1:0] columns, + output[MEM_ADDRESS_WIDTH - 1:0] addr0, addr1, addr2, addr3, addr4, + output[MEM_ADDRESS_WIDTH - 1:0] addr5, addr6, addr7, addr8 +); + assign addr0 = addr_initial; + assign addr1 = addr_initial + 1; + assign addr2 = addr_initial + 2; + assign addr3 = addr_initial + columns; + assign addr4 = addr_initial + columns + 1; + assign addr5 = addr_initial + columns + 2; + assign addr6 = addr_initial + (2 * columns); + assign addr7 = addr_initial + (2 * columns) + 1; + assign addr8 = addr_initial + (2 * columns) + 2; +endmodule \ No newline at end of file diff --git a/makefile b/makefile new file mode 100644 index 0000000..0fb133a --- /dev/null +++ b/makefile @@ -0,0 +1,49 @@ +all: arithmetic dot matrixmultiplication data_mem load_block add_block multiplier registers multiplexer fsm prog_mem controller matrix_manager multiplier_network multiplier6by6 + +clean: + rm arithmetic dot matrixmultiplication data_mem load_block add_block multiplier registers multiplexer fsm prog_mem controller matrix_manager multiplier_network multiplier6by6 + +arithmetic: arithmetic.v arithmetic.t.v + iverilog -Wall -o arithmetic arithmetic.t.v + +dot: dot.v dot.t.v arithmetic + iverilog -Wall -o dot dot.t.v + +matrixmultiplication: matrixmultiplication.v matrixmultiplication.t.v dot + iverilog -Wall -o matrixmultiplication matrixmultiplication.t.v + +data_mem: data_memory.v data_memory.t.v + iverilog -Wall -o data_mem data_memory.t.v + +load_block: load_block.v load_block.t.v data_mem + iverilog -Wall -o load_block load_block.t.v + +add_block: add3by3.v add3by3.t.v + iverilog -Wall -o add_block add3by3.t.v + +registers: registers.v registers.t.v + iverilog -Wall -o registers registers.t.v + +multiplier: multiplier.v multiplier.t.v registers matrixmultiplication + iverilog -Wall -o multiplier multiplier.t.v + +fsm: fsm.v fsm.t.v + iverilog -Wall -o fsm fsm.t.v + +multiplexer: multiplexer.v multiplexer.t.v + iverilog -Wall -o multiplexer multiplexer.t.v + +prog_mem: prog_memory.v prog_memory.t.v + iverilog -Wall -o prog_mem prog_memory.t.v + +controller: controller.v controller.t.v prog_mem fsm + iverilog -Wall -o controller controller.t.v + +multiplier_network: multiplier_network.v multiplier_network.t.v multiplexer multiplier add_block + iverilog -Wall -o multiplier_network multiplier_network.t.v + +matrix_manager: matrix_manager.v matrix_manager.t.v data_mem registers load_block + iverilog -Wall -o matrix_manager matrix_manager.t.v + +multiplier6by6: multiplier6by6.v multiplier6by6.t.v controller matrix_manager multiplier_network + iverilog -Wall -o multiplier6by6 multiplier6by6.t.v diff --git a/matrix_manager.t.v b/matrix_manager.t.v new file mode 100644 index 0000000..900597c --- /dev/null +++ b/matrix_manager.t.v @@ -0,0 +1,137 @@ +`include "matrix_manager.v" + +/* +Since I dont' have direect access to the contents of memory, I can't adequately test writing data. +*/ + +module matrixmanagertest(); + +reg clk; + +reg dm_we, next_row, column; +reg[9:0] n, m, p; + +reg[4:0] dataIn0, dataIn1, dataIn2, dataIn3, dataIn4, dataIn5, dataIn6, dataIn7, dataIn8; +wire[4:0] dataOut0, dataOut1, dataOut2, dataOut3, dataOut4, dataOut5, dataOut6, dataOut7, dataOut8; + + +matrix_manager dut (.clk(clk), .dm_we(dm_we), .next_row(next_row), .column(column), +.n(n), .m(m), .p(p), +.dataIn0(dataIn0), .dataIn1(dataIn1), .dataIn2(dataIn2), .dataIn3(dataIn3), .dataIn4(dataIn4), .dataIn5(dataIn5), .dataIn6(dataIn6), .dataIn7(dataIn7), .dataIn8(dataIn8), +.dataOut0(dataOut0), .dataOut1(dataOut1), .dataOut2(dataOut2), .dataOut3(dataOut3), .dataOut4(dataOut4), .dataOut5(dataOut5), .dataOut6(dataOut6), .dataOut7(dataOut7), .dataOut8(dataOut8)); + +always #10 clk = !clk; + +initial begin + +$dumpfile("matrix_manager.vcd"); +$dumpvars(); + +clk = 1'b0; + +next_row = 1'b1; +column = 1'b0; +n = 10'd6; +m = 10'd3; +p = 10'd6; + +// A * C D = E F +// B G H + +dm_we = 1'b0; + +#20 + +// A +if (dataOut0 !== 5'd0 || dataOut1 !== 5'd1 || dataOut2 !== 5'd2 || + dataOut3 !== 5'd3 || dataOut4 !== 5'd4 || dataOut5 !== 5'd5 || + dataOut6 !== 5'd6 || dataOut7 !== 5'd7 || dataOut8 !== 5'd8) begin + $display("test 1 FAILED: matrix manager did not output first matrix correctly"); +end + +next_row = 1'b1; + +#20 + +// B +if (dataOut0 !== 5'd9 || dataOut1 !== 5'd10 || dataOut2 !== 5'd11 || + dataOut3 !== 5'd12 || dataOut4 !== 5'd13 || dataOut5 !== 5'd14 || + dataOut6 !== 5'd15 || dataOut7 !== 5'd16 || dataOut8 !== 5'd17) begin + $display("test 2 FAILED: matrix manager did not output first matrix correctly"); +end + +column = 1'b1; + +#20 + +// C +if (dataOut0 !== 5'd18 || dataOut1 !== 5'd19 || dataOut2 !== 5'd20 || + dataOut3 !== 5'd24 || dataOut4 !== 5'd25 || dataOut5 !== 5'd26 || + dataOut6 !== 5'd30 || dataOut7 !== 5'd31 || dataOut8 !== 5'd0) begin + $display("test 3 FAILED: matrix manager did not output second matrix correctly"); +end + +next_row = 1'b0; + +#20 + +// D +if (dataOut0 !== 5'd21 || dataOut1 !== 5'd22 || dataOut2 !== 5'd23 || + dataOut3 !== 5'd27 || dataOut4 !== 5'd28 || dataOut5 !== 5'd29 || + dataOut6 !== 5'd0 || dataOut7 !== 5'd0 || dataOut8 !== 5'd0) begin + $display("test 4 FAILED: matrix manager did not output second matrix correctly"); +end + +next_row = 1'b1; +dm_we = 1'b1; + +dataIn0 = 5'd0; +dataIn1 = 5'd1; +dataIn2 = 5'd2; +dataIn3 = 5'd3; +dataIn4 = 5'd4; +dataIn5 = 5'd5; +dataIn6 = 5'd6; +dataIn7 = 5'd7; +dataIn8 = 5'd8; + +#10 // go to negedge + +//E +if (dataOut0 !== 5'd0 || dataOut1 !== 5'd1 || dataOut2 !== 5'd2 || + dataOut3 !== 5'd3 || dataOut4 !== 5'd4 || dataOut5 !== 5'd5 || + dataOut6 !== 5'd6 || dataOut7 !== 5'd7 || dataOut8 !== 5'd8) begin + $display("test 5 FAILED: matrix manager did not write result matrix correctly"); +end + +next_row = 1'b0; + +dataIn0 = 5'd10; +dataIn1 = 5'd11; +dataIn2 = 5'd12; +dataIn3 = 5'd13; +dataIn4 = 5'd14; +dataIn5 = 5'd15; +dataIn6 = 5'd16; +dataIn7 = 5'd17; +dataIn8 = 5'd18; + +#20 + +if (dataOut0 !== 5'd10 || dataOut1 !== 5'd11 || dataOut2 !== 5'd12 || + dataOut3 !== 5'd13 || dataOut4 !== 5'd14 || dataOut5 !== 5'd15 || + dataOut6 !== 5'd16 || dataOut7 !== 5'd17 || dataOut8 !== 5'd18) begin + $display("test 6 FAILED: matrix manager did not write result matrix correctly"); +end + + + + + +$finish; + + + +end + +endmodule \ No newline at end of file diff --git a/matrix_manager.v b/matrix_manager.v new file mode 100644 index 0000000..769cdf8 --- /dev/null +++ b/matrix_manager.v @@ -0,0 +1,95 @@ +`include "data_memory.v" +`include "load_block.v" + +/* +The Matrix Manager loads or writes matrices using 3x3 matrices at a time. Starting from (0,0), +the matrix manager iterates through the addresses of all the matrices in row major order, according +to the control signals given as input that tell when to move to the next row, and how long the rows are. +The control signals also control whether we write to memory or save to memory. + +Inputs: + - clk + - dm_we: whether we should write to memory + - next_row: signals whether we should address from the next row of 3x3 matrices + - column: signals which column size to use - the column size of the first matrix, + or the column size of the second and result matrices. + - n: number of rows on the first array + - m: number of columns on the first matrix, and number of rows on the second matrix + - p: number of columns on the second matrix + - dataIn0...dataIn8: input 3x3 array + +Outputs: + - dataOut0...dataOut8: output 3x3 array + +*/ + + +module matrix_manager #(parameter ADDR_WIDTH=10, parameter DATA_WIDTH=5) +( + input clk, dm_we, next_row, column, + input[ADDR_WIDTH-1:0] n, m, p, + input[DATA_WIDTH-1:0] dataIn0, dataIn1, dataIn2, dataIn3, dataIn4, dataIn5, dataIn6, dataIn7, dataIn8, + output[DATA_WIDTH-1:0] dataOut0, dataOut1, dataOut2, dataOut3, dataOut4, dataOut5, dataOut6, dataOut7, dataOut8 +); + +wire[ADDR_WIDTH-1:0] addr0, addr1, addr2, addr3, addr4, addr5, addr6, addr7, addr8; + +data_memory #( + .width(DATA_WIDTH), + .addresswidth(ADDR_WIDTH), + .depth(1024) + ) + data_mem ( + .clk(clk), + .data0(dataOut0), .data1(dataOut1), .data2(dataOut2), .data3(dataOut3), .data4(dataOut4), + .data5(dataOut5), .data6(dataOut6), .data7(dataOut7), .data8(dataOut8), + .addr0(addr0<<2), .addr1(addr1<<2), .addr2(addr2<<2), .addr3(addr3<<2), .addr4(addr4<<2), + .addr5(addr5<<2), .addr6(addr6<<2), .addr7(addr7<<2), .addr8(addr8<<2), + .writeEnable(dm_we), + .dataIn0(dataIn0), .dataIn1(dataIn1), .dataIn2(dataIn2), .dataIn3(dataIn3), .dataIn4(dataIn4), + .dataIn5(dataIn5), .dataIn6(dataIn6), .dataIn7(dataIn7), .dataIn8(dataIn8) + ); + +reg[ADDR_WIDTH-1:0] next_addr; +reg[ADDR_WIDTH-1:0] curr_addr; + +reg[ADDR_WIDTH-1:0] num_columns; + +address3by3block #(.MEM_ADDRESS_WIDTH(ADDR_WIDTH)) addr_loader + ( + .addr_initial(curr_addr), + .columns(num_columns), + .addr0(addr0), .addr1(addr1), .addr2(addr2), .addr3(addr3), .addr4(addr4), + .addr5(addr5), .addr6(addr6), .addr7(addr7), .addr8(addr8) + ); + +initial begin + next_addr = {ADDR_WIDTH{1'b0}}; + curr_addr = {ADDR_WIDTH{1'b0}}; + num_columns = m; +end + +always @(*) begin + + if (curr_addr === {ADDR_WIDTH{1'bx}}) begin + next_addr <= {ADDR_WIDTH{1'b0}}; + end + else if (next_row == 1'b1) begin + next_addr <= curr_addr + 3 + (2*num_columns); + end + else begin + next_addr <= curr_addr + 3; + end +end + + +always @(posedge clk) begin + curr_addr = next_addr; + + if (column == 1'b1) + num_columns = p; + else + num_columns = m; +end + +endmodule \ No newline at end of file diff --git a/matrix_manager_test.dat b/matrix_manager_test.dat new file mode 100644 index 0000000..977c778 --- /dev/null +++ b/matrix_manager_test.dat @@ -0,0 +1,66 @@ +// This is the memory that should be used in the matrix_manager test bench +0_0000 +0_0001 +0_0010 +0_0011 +0_0100 +0_0101 +0_0110 +0_0111 +0_1000 +0_1001 +0_1010 +0_1011 +0_1100 +0_1101 +0_1110 +0_1111 +1_0000 +1_0001 +1_0010 +1_0011 +1_0100 +1_0101 +1_0110 +1_0111 +1_1000 +1_1001 +1_1010 +1_1011 +1_1100 +1_1101 +1_1110 +1_1111 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 \ No newline at end of file diff --git a/matrix_mem.dat b/matrix_mem.dat new file mode 100644 index 0000000..52a8280 --- /dev/null +++ b/matrix_mem.dat @@ -0,0 +1,1024 @@ +00001 +00010 +00011 +00011 +00010 +00001 +00010 +00011 +00011 +00010 +00001 +00001 +00001 +00011 +00001 +00001 +00010 +00001 +00010 +00001 +00010 +00000 +00000 +00001 +00011 +00001 +00000 +00011 +00010 +00010 +00001 +00010 +00011 +00010 +00011 +00010 +00001 +00010 +00011 +00010 +00010 +00001 +00010 +00011 +00000 +00010 +00001 +00001 +00001 +00011 +00001 +00001 +00010 +00001 +00010 +00001 +00001 +00000 +00000 +00001 +00011 +00001 +00000 +00010 +00011 +00010 +00001 +00010 +00011 +00010 +00001 +00010 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 \ No newline at end of file diff --git a/matrixmultiplication.t.v b/matrixmultiplication.t.v new file mode 100644 index 0000000..e9d1a44 --- /dev/null +++ b/matrixmultiplication.t.v @@ -0,0 +1,58 @@ +/* +Test bench for the 3x3 matrix multiplication module. +*/ + +`include "matrixmultiplication.v" + +module matrixmultiplication3by3_TEST(); + parameter ENTRY_SIZE = 5; + parameter RESENTRY_SIZE = 9; + parameter VECTOR_SIZE = 3 * ENTRY_SIZE; + parameter RESVECTOR_SIZE = 3 * RESENTRY_SIZE; + + reg[VECTOR_SIZE - 1:0] matrixAv1, matrixAv2, matrixAv3; + reg[VECTOR_SIZE - 1:0] matrixBv1, matrixBv2, matrixBv3; + + wire[RESVECTOR_SIZE - 1:0] matrixCv1, matrixCv2, matrixCv3; + + matrixmultiplication3by3 #( + .ENTRY_SIZE(ENTRY_SIZE), + .RESENTRY_SIZE(RESENTRY_SIZE) + ) dut ( + .matrixAv1(matrixAv1), .matrixAv2(matrixAv2), .matrixAv3(matrixAv3), + .matrixBv1(matrixBv1), .matrixBv2(matrixBv2), .matrixBv3(matrixBv3), + .matrixCv1(matrixCv1), .matrixCv2(matrixCv2), .matrixCv3(matrixCv3) + ); + + initial begin + $dumpfile("matmul.vcd"); + $dumpvars(); + + // Multiply by zero matrix. + matrixAv1 = {{5'd0}, {5'd0}, {5'd0}}; matrixAv2 = {{5'd0}, {5'd0}, {5'd0}}; matrixAv3 = {{5'd0}, {5'd0}, {5'd0}}; + matrixBv1 = {{5'd0}, {5'd0}, {5'd0}}; matrixBv2 = {{5'd0}, {5'd0}, {5'd0}}; matrixBv3 = {{5'd0}, {5'd2}, {5'd1}}; + #50 + if (matrixCv1 !== 27'b0 || matrixCv2 !== 27'b0 || matrixCv3 !== 27'b0) begin + $display("Test failed. Expected 0 matrix, received: \n%d\t%d\t%d\n%d\t%d\t%d\n%d\t%d\t%d", + matrixCv1[26:18], matrixCv1[17:9], matrixCv1[8:0], + matrixCv2[26:18], matrixCv2[17:9], matrixCv2[8:0], + matrixCv3[26:18], matrixCv3[17:9], matrixCv3[8:0]); + end + + // Multiply by identity matrix. + matrixAv1 = {{5'd1}, {5'd0}, {5'd0}}; matrixAv2 = {{5'd0}, {5'd1}, {5'd0}}; matrixAv3 = {{5'd0}, {5'd0}, {5'd1}}; + matrixBv1 = {{5'd1}, {5'd2}, {5'd3}}; matrixBv2 = {{5'd2}, {5'd3}, {5'd5}}; matrixBv3 = {{5'd3}, {5'd1}, {5'd2}}; + #50 + if (matrixCv1 !== {{9'd1}, {9'd2}, {9'd3}} || matrixCv2 !== {{9'd2}, {9'd3}, {9'd5}} || matrixCv3 !== {{9'd3}, {9'd1}, {9'd2}}) begin + $display("Test failed. Expected: \n%d\t%d\t%d\n%d\t%d\t%d\n%d\t%d\t%d\n Actual: \n%d\t%d\t%d\n%d\t%d\t%d\n%d\t%d\t%d", + 9'd1, 9'd2, 9'd3, + 9'd2, 9'd3, 9'd5, + 9'd3, 9'd1, 9'd2, + matrixCv1[26:18], matrixCv1[17:9], matrixCv1[8:0], + matrixCv2[26:18], matrixCv2[17:9], matrixCv2[8:0], + matrixCv3[26:18], matrixCv3[17:9], matrixCv3[8:0]); + end + + $finish(); + end +endmodule \ No newline at end of file diff --git a/matrixmultiplication.v b/matrixmultiplication.v new file mode 100644 index 0000000..e33e17a --- /dev/null +++ b/matrixmultiplication.v @@ -0,0 +1,128 @@ +/* +Multiply two square 3x3 matrices. +Represents the operation A * B = C +Inputs: + - matrixAv1, ..., v3 and matrixBv1, ..., v3 represent the rows of A and B. +Outputs: + - matrixCv1, ..., v3 repreents the rows of C. +Parameters: + - ENTRY_SIZE is the size of each individual entry of A and B. + - RESENTRY_SIZE is the size of each individual entry in the result C. + - VECTOR_SIZE is the size of each row vector in A and B. + - RESVECTOR_SIZE is the size of each row vector in the result C. +*/ + +`include "dot.v" + +module matrixmultiplication3by3 +#( + parameter ENTRY_SIZE = 5, + parameter RESENTRY_SIZE = 9, + parameter VECTOR_SIZE = 3 * ENTRY_SIZE, + parameter RESVECTOR_SIZE = 3 * RESENTRY_SIZE +) +( + input[VECTOR_SIZE - 1: 0] matrixAv1, matrixAv2, matrixAv3, + input[VECTOR_SIZE - 1: 0] matrixBv1, matrixBv2, matrixBv3, + output [RESVECTOR_SIZE - 1: 0] matrixCv1, matrixCv2, matrixCv3 +); + +wire[RESENTRY_SIZE - 1:0] c11, c12, c13; +wire[RESENTRY_SIZE - 1:0] c21, c22, c23; +wire[RESENTRY_SIZE - 1:0] c31, c32, c33; + +// Row 1 Column 1 +dot #(.ENTRY_SIZE(ENTRY_SIZE), .RESENTRY_SIZE(RESENTRY_SIZE)) r1c1 ( + .a0(matrixAv1[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .a1(matrixAv1[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .a2(matrixAv1[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .b0(matrixBv1[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .b1(matrixBv2[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .b2(matrixBv3[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .product(c11)); + +// Row 1 Column 2 +dot #(.ENTRY_SIZE(ENTRY_SIZE), .RESENTRY_SIZE(RESENTRY_SIZE)) r1c2 ( + .a0(matrixAv1[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .a1(matrixAv1[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .a2(matrixAv1[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .b0(matrixBv1[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .b1(matrixBv2[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .b2(matrixBv3[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .product(c12)); + +// Row 1 Column 3 +dot #(.ENTRY_SIZE(ENTRY_SIZE), .RESENTRY_SIZE(RESENTRY_SIZE)) r1c3 ( + .a0(matrixAv1[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .a1(matrixAv1[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .a2(matrixAv1[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .b0(matrixBv1[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .b1(matrixBv2[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .b2(matrixBv3[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .product(c13)); + +// Row 2 Column 1 +dot #(.ENTRY_SIZE(ENTRY_SIZE), .RESENTRY_SIZE(RESENTRY_SIZE)) r2c1 ( + .a0(matrixAv2[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .a1(matrixAv2[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .a2(matrixAv2[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .b0(matrixBv1[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .b1(matrixBv2[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .b2(matrixBv3[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .product(c21)); + +// Row 2 Column 2 +dot #(.ENTRY_SIZE(ENTRY_SIZE), .RESENTRY_SIZE(RESENTRY_SIZE)) r2c2 ( + .a0(matrixAv2[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .a1(matrixAv2[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .a2(matrixAv2[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .b0(matrixBv1[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .b1(matrixBv2[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .b2(matrixBv3[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .product(c22)); + +// Row 2 Column 3 +dot #(.ENTRY_SIZE(ENTRY_SIZE), .RESENTRY_SIZE(RESENTRY_SIZE)) r2c3 ( + .a0(matrixAv2[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .a1(matrixAv2[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .a2(matrixAv2[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .b0(matrixBv1[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .b1(matrixBv2[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .b2(matrixBv3[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .product(c23)); + +// Row 3 Column 1 +dot #(.ENTRY_SIZE(ENTRY_SIZE), .RESENTRY_SIZE(RESENTRY_SIZE)) r3c1 ( + .a0(matrixAv3[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .a1(matrixAv3[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .a2(matrixAv3[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .b0(matrixBv1[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .b1(matrixBv2[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .b2(matrixBv3[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .product(c31)); + +// Row 3 Column 2 +dot #(.ENTRY_SIZE(ENTRY_SIZE), .RESENTRY_SIZE(RESENTRY_SIZE)) r3c2 ( + .a0(matrixAv3[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .a1(matrixAv3[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .a2(matrixAv3[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .b0(matrixBv1[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .b1(matrixBv2[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .b2(matrixBv3[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .product(c32)); + +// Row 3 Column 3 +dot #(.ENTRY_SIZE(ENTRY_SIZE), .RESENTRY_SIZE(RESENTRY_SIZE)) r3c3 ( + .a0(matrixAv3[VECTOR_SIZE - 1:VECTOR_SIZE - ENTRY_SIZE]), + .a1(matrixAv3[VECTOR_SIZE - ENTRY_SIZE - 1: VECTOR_SIZE - (2 * ENTRY_SIZE)]), + .a2(matrixAv3[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .b0(matrixBv1[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .b1(matrixBv2[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .b2(matrixBv3[VECTOR_SIZE - (2 * ENTRY_SIZE) - 1: 0]), + .product(c33)); + +assign matrixCv1 = {{c11}, {c12}, {c13}}; +assign matrixCv2 = {{c21}, {c22}, {c23}}; +assign matrixCv3 = {{c31}, {c32}, {c33}}; + +endmodule \ No newline at end of file diff --git a/mem_test_data.dat b/mem_test_data.dat new file mode 100644 index 0000000..1bb139f --- /dev/null +++ b/mem_test_data.dat @@ -0,0 +1,65 @@ +// This is the memory that should be used in the data memory test bench +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 +0_0000 \ No newline at end of file diff --git a/multiplexer.t.v b/multiplexer.t.v new file mode 100644 index 0000000..2386d09 --- /dev/null +++ b/multiplexer.t.v @@ -0,0 +1,44 @@ +/* +Test bench for 4 to 1 multiplexer +*/ + +`include "multiplexer.v" + +module multiplexer4to1_TEST(); + parameter ENTRY_SIZE = 5; + + reg [1:0] res_sel; + reg[(ENTRY_SIZE * 9) - 1:0] A, B, C, D; + wire [(ENTRY_SIZE * 9) - 1:0] result; + + multiplexer4to1 #(.ENTRY_SIZE(ENTRY_SIZE)) dut ( + .res_sel(res_sel), .AEplusBG(A), .AFplusBH(B), .CEplusDG(C), .CFplusDH(D), .result(result) + ); + + initial begin + A = {9{5'b00001}}; B = {9{5'b00010}}; C = {9{5'b00100}}; D = {9{5'b01000}}; + res_sel = 2'b00; #50 + + if (result !== A) begin + $display("Test failed: input 0 not selected"); + end + + res_sel = 2'b01; #50 + + if (result !== B) begin + $display("Test failed: input 1 not selected"); + end + + res_sel = 2'b10; #50 + + if (result !== C) begin + $display("Test failed: input 2 not selected"); + end + + res_sel = 2'b11; #50 + + if (result !== D) begin + $display("Test failed: input 3 not selected"); + end + end +endmodule // multiplexer4to1_TEST \ No newline at end of file diff --git a/multiplexer.v b/multiplexer.v new file mode 100644 index 0000000..ac051e1 --- /dev/null +++ b/multiplexer.v @@ -0,0 +1,19 @@ +/* +4 input multiplexer +*/ + +module multiplexer4to1 +#(parameter ENTRY_SIZE = 5)( + input[1:0] res_sel, + input [(9 * ENTRY_SIZE) - 1:0] AEplusBG, AFplusBH, CEplusDG, CFplusDH, + output reg [(9 * ENTRY_SIZE) - 1:0] result +); + always @(res_sel) begin + case (res_sel) + 2'b00: begin result <= AEplusBG; end + 2'b01: begin result <= AFplusBH; end + 2'b10: begin result <= CEplusDG; end + 2'b11: begin result <= CFplusDH; end + endcase + end +endmodule \ No newline at end of file diff --git a/multiplier.t.v b/multiplier.t.v new file mode 100644 index 0000000..50fdb5b --- /dev/null +++ b/multiplier.t.v @@ -0,0 +1,79 @@ +/* +Test bench for the multiplication units +*/ + +`include "multiplier.v" + +module multiplier_TEST(); + parameter ENTRY_SIZE = 5; + parameter RESENTRY_SIZE = ENTRY_SIZE; + + reg clk; + reg a_wrenable; + reg b_wrenable; + reg [ENTRY_SIZE - 1:0] a0_in, a1_in, a2_in, a3_in; + reg [ENTRY_SIZE - 1:0] a4_in, a5_in, a6_in, a7_in, a8_in; + reg [ENTRY_SIZE - 1:0] b0_in, b1_in, b2_in, b3_in; + reg [ENTRY_SIZE - 1:0] b4_in, b5_in, b6_in, b7_in, b8_in; + wire [RESENTRY_SIZE - 1:0] c0_out, c1_out, c2_out, c3_out; + wire [RESENTRY_SIZE - 1:0] c4_out, c5_out, c6_out, c7_out, c8_out; + + multiplier #(.ENTRY_SIZE(ENTRY_SIZE), .RESENTRY_SIZE(RESENTRY_SIZE)) dut ( + .clk(clk), .a_wrenable(a_wrenable), .b_wrenable(b_wrenable), .a0_in(a0_in), .a1_in(a1_in), .a2_in(a2_in), + .a3_in(a3_in), .a4_in(a4_in), .a5_in(a5_in), .a6_in(a6_in), .a7_in(a7_in), .a8_in(a8_in), .b0_in(b0_in), .b1_in(b1_in), .b2_in(b2_in), + .b3_in(b3_in), .b4_in(b4_in), .b5_in(b5_in), .b6_in(b6_in), .b7_in(b7_in), .b8_in(b8_in), .c0_out(c0_out), .c1_out (c1_out), + .c2_out(c2_out), .c3_out(c3_out), .c4_out(c4_out), .c5_out(c5_out), .c6_out(c6_out), .c7_out(c7_out), .c8_out(c8_out) + ); + + initial begin + $dumpfile("multiplier.vcd"); + $dumpvars(); + clk = 0; a_wrenable = 0; b_wrenable = 0; + + a0_in = 5'd1; a1_in = 5'd0; a2_in = 5'd0; + a3_in = 5'd0; a4_in = 5'd1; a5_in = 5'd0; + a6_in = 5'd0; a7_in = 5'd0; a8_in = 5'd1; + + b0_in = 5'd1; b1_in = 5'd2; b2_in = 5'd3; + b3_in = 5'd8; b4_in = 5'd9; b5_in = 5'd10; + b6_in = 5'd15; b7_in = 5'd16; b8_in = 5'd17; + + #50 + + + a_wrenable = 1'b1; b_wrenable = 1'b1; clk = 1; + + #50 + clk = 0; #50 + clk = 1; #50 + if (c0_out !== b0_in) begin + $display("Test failed: c0_out is not the correct value."); + end + if (c1_out !== b1_in) begin + $display("Test failed: c1_out is not the correct value."); + end + if (c2_out !== b2_in) begin + $display("Test failed: c2_out is not the correct value."); + end + if (c3_out !== b3_in) begin + $display("Test failed: c3_out is not the correct value."); + end + if (c4_out !== b4_in) begin + $display("Test failed: c4_out is not the correct value."); + end + if (c5_out !== b5_in) begin + $display("Test failed: c5_out is not the correct value."); + end + if (c6_out !== b6_in) begin + $display("Test failed: c6_out is not the correct value."); + end + if (c7_out !== b7_in) begin + $display("Test failed: c7_out is not the correct value."); + end + if (c8_out !== b8_in) begin + $display("Test failed: c8_out is not the correct value."); + end + + $finish(); + end +endmodule \ No newline at end of file diff --git a/multiplier.v b/multiplier.v new file mode 100644 index 0000000..e3e3a60 --- /dev/null +++ b/multiplier.v @@ -0,0 +1,48 @@ +/* +A block containing a 3 by 3 matrix multiplier and registers +holding the data for both the operands and the result. +*/ + +`include "registers.v" +`include "matrixmultiplication.v" + +module multiplier +#( + parameter ENTRY_SIZE = 5, + parameter RESENTRY_SIZE = 5 +)( + input clk, + input a_wrenable, + input b_wrenable, + input [ENTRY_SIZE - 1:0] a0_in, a1_in, a2_in, a3_in, + input [ENTRY_SIZE - 1:0] a4_in, a5_in, a6_in, a7_in, a8_in, + input [ENTRY_SIZE - 1:0] b0_in, b1_in, b2_in, b3_in, + input [ENTRY_SIZE - 1:0] b4_in, b5_in, b6_in, b7_in, b8_in, + output [RESENTRY_SIZE - 1:0] c0_out, c1_out, c2_out, c3_out, + output [RESENTRY_SIZE - 1:0] c4_out, c5_out, c6_out, c7_out, c8_out +); + wire [ENTRY_SIZE - 1:0] a0_out, a1_out, a2_out, a3_out; + wire [ENTRY_SIZE - 1:0] a4_out, a5_out, a6_out, a7_out, a8_out; + wire [ENTRY_SIZE - 1:0] b0_out, b1_out, b2_out, b3_out; + wire [ENTRY_SIZE - 1:0] b4_out, b5_out, b6_out, b7_out, b8_out; + wire [RESENTRY_SIZE - 1:0] c0_in, c1_in, c2_in, c3_in; + wire [RESENTRY_SIZE - 1:0] c4_in, c5_in, c6_in, c7_in, c8_in; + + regfile #(.width(ENTRY_SIZE)) registers ( + .a_wrenable(a_wrenable), .b_wrenable(b_wrenable), .c_wrenable(1'b1), .a0_in(a0_in), .a1_in(a1_in), .a2_in(a2_in), + .a3_in(a3_in), .a4_in(a4_in), .a5_in(a5_in), .a6_in(a6_in), .a7_in(a7_in), .a8_in(a8_in), + .b0_in(b0_in), .b1_in(b1_in), .b2_in(b2_in), .b3_in(b3_in), .b4_in(b4_in), .b5_in(b5_in), + .b6_in(b6_in), .b7_in(b7_in), .b8_in(b8_in), .c0_in(c0_in), .c1_in(c1_in), .c2_in(c2_in), .c3_in(c3_in), + .c4_in(c4_in), .c5_in(c5_in), .c6_in(c6_in), .c7_in(c7_in), .c8_in(c8_in), .a0_out(a0_out), .a1_out(a1_out), + .a2_out(a2_out), .a3_out(a3_out), .a4_out(a4_out), .a5_out(a5_out), .a6_out(a6_out), .a7_out(a7_out), .a8_out(a8_out), + .b0_out(b0_out), .b1_out(b1_out), .b2_out(b2_out), .b3_out(b3_out), .b4_out(b4_out), .b5_out(b5_out), + .b6_out(b6_out), .b7_out(b7_out), .b8_out(b8_out), .c0_out(c0_out), .c1_out(c1_out), .c2_out(c2_out), .c3_out(c3_out), + .c4_out(c4_out), .c5_out(c5_out), .c6_out(c6_out), .c7_out(c7_out), .c8_out(c8_out), .clk(clk) + ); + + matrixmultiplication3by3 #(.ENTRY_SIZE(ENTRY_SIZE), .RESENTRY_SIZE(RESENTRY_SIZE)) mult ( + .matrixAv1({{a0_out}, {a1_out}, {a2_out}}), .matrixAv2({{a3_out}, {a4_out}, {a5_out}}), .matrixAv3({{a6_out}, {a7_out}, {a8_out}}), + .matrixBv1({{b0_out}, {b1_out}, {b2_out}}), .matrixBv2({{b3_out}, {b4_out}, {b5_out}}), .matrixBv3({{b6_out}, {b7_out}, {b8_out}}), + .matrixCv1({{c0_in}, {c1_in}, {c2_in}}), .matrixCv2({{c3_in}, {c4_in}, {c5_in}}), .matrixCv3({{c6_in}, {c7_in}, {c8_in}}) + ); +endmodule \ No newline at end of file diff --git a/multiplier6by6.t.v b/multiplier6by6.t.v new file mode 100644 index 0000000..eab15b2 --- /dev/null +++ b/multiplier6by6.t.v @@ -0,0 +1,33 @@ +/* +Test bench for the full 6 by 6 multiplier +*/ + +`include "multiplier6by6.v" + +module multiplier6by6_TEST(); + `define CLK_DELAY #50 + + reg clk; + + multiplier6by6 dut (.clk(clk)); + + initial clk = 0; + always `CLK_DELAY clk = !clk; + + integer mem_out, i; + + initial begin + $dumpfile("multiplier6by6.vcd"); + $dumpvars(); + + #5000 + + mem_out = $fopen("memory_out.txt"); + for (i=0; i<1024; i=i+1) begin + $fdisplay(mem_out, "%d", dut.manager.data_mem.memory[i]); + end + $fclose(mem_out); + + $finish(); + end +endmodule diff --git a/multiplier6by6.v b/multiplier6by6.v new file mode 100644 index 0000000..c02b7b1 --- /dev/null +++ b/multiplier6by6.v @@ -0,0 +1,56 @@ +/* +The full 6 by 6 multiplier. + +Inputs: + -clk: the system clock +*/ + +`include "controller.v" +`include "matrix_manager.v" +`include "multiplier_network.v" + +module multiplier6by6( + input clk +); + wire data_we; + wire weA, weB, weC, weD, weE, weF, weG, weH; + wire[1:0] jklm_select; + wire next_row, column; + + wire[4:0] dataOut0, dataOut1, dataOut2, dataOut3, dataOut4, dataOut5, dataOut6, dataOut7, dataOut8; + + wire[44:0] result; + + controller control (.clk(clk), .data_we(data_we), .weA(weA), + .weB(weB), .weC(weC), .weD(weD), .weE(weE), .weF(weF), + .weG(weG), .weH(weH), .jklm_select(jklm_select), + .next_row(next_row), .column(column)); + + multiplier_network network ( + .clk(clk),.res_sel(jklm_select), .a_wrenable(weA), .b_wrenable(weB), + .c_wrenable(weC), .d_wrenable(weD), .e_wrenable(weE), .f_wrenable(weF), .g_wrenable(weG), .h_wrenable(weH), + .ae0_in(dataOut0), .ae1_in(dataOut1), .ae2_in(dataOut2), .ae3_in(dataOut3), .ae4_in(dataOut4), .ae5_in(dataOut5), + .ae6_in(dataOut6), .ae7_in(dataOut7), .ae8_in(dataOut8), + .af0_in(dataOut0), .af1_in(dataOut1), .af2_in(dataOut2), .af3_in(dataOut3), .af4_in(dataOut4), .af5_in(dataOut5), + .af6_in(dataOut6), .af7_in(dataOut7), .af8_in(dataOut8), + .bg0_in(dataOut0), .bg1_in(dataOut1), .bg2_in(dataOut2), .bg3_in(dataOut3), .bg4_in(dataOut4), .bg5_in(dataOut5), + .bg6_in(dataOut6), .bg7_in(dataOut7), .bg8_in(dataOut8), + .bh0_in(dataOut0), .bh1_in(dataOut1), .bh2_in(dataOut2), .bh3_in(dataOut3), .bh4_in(dataOut4), .bh5_in(dataOut5), + .bh6_in(dataOut6), .bh7_in(dataOut7), .bh8_in(dataOut8), + .ce0_in(dataOut0), .ce1_in(dataOut1), .ce2_in(dataOut2), .ce3_in(dataOut3), .ce4_in(dataOut4), .ce5_in(dataOut5), + .ce6_in(dataOut6), .ce7_in(dataOut7), .ce8_in(dataOut8), + .cf0_in(dataOut0), .cf1_in(dataOut1), .cf2_in(dataOut2), .cf3_in(dataOut3), .cf4_in(dataOut4), .cf5_in(dataOut5), + .cf6_in(dataOut6), .cf7_in(dataOut7), .cf8_in(dataOut8), + .dg0_in(dataOut0), .dg1_in(dataOut1), .dg2_in(dataOut2), .dg3_in(dataOut3), .dg4_in(dataOut4), .dg5_in(dataOut5), + .dg6_in(dataOut6), .dg7_in(dataOut7), .dg8_in(dataOut8), + .dh0_in(dataOut0), .dh1_in(dataOut1), .dh2_in(dataOut2), .dh3_in(dataOut3), .dh4_in(dataOut4), .dh5_in(dataOut5), + .dh6_in(dataOut6), .dh7_in(dataOut7), .dh8_in(dataOut8), .res(result) + ); + + matrix_manager #(.ADDR_WIDTH(32)) manager (.clk(clk), .dm_we(data_we), + .next_row(next_row), .column(column), .n(6), .m(6), .p(6), .dataIn0 (result[44:40]), .dataIn1 (result[39:35]), + .dataIn2 (result[34:30]), .dataIn3 (result[29:25]), .dataIn4 (result[24:20]), .dataIn5 (result[19:15]), .dataIn6 (result[14:10]), + .dataIn7 (result[9:5]), .dataIn8 (result[4:0]), .dataOut0(dataOut0), .dataOut1(dataOut1), + .dataOut2(dataOut2), .dataOut3(dataOut3), .dataOut4(dataOut4), .dataOut5(dataOut5), + .dataOut6(dataOut6), .dataOut7(dataOut7), .dataOut8(dataOut8)); +endmodule \ No newline at end of file diff --git a/multiplier_network.t.v b/multiplier_network.t.v new file mode 100644 index 0000000..f9a5f3c --- /dev/null +++ b/multiplier_network.t.v @@ -0,0 +1,460 @@ +/* +Test bench for multiplier network +*/ + +`include "multiplier_network.v" + +module multiplier_network_TEST(); + parameter ENTRY_SIZE = 5; + reg clk, a_wrenable, b_wrenable, c_wrenable, d_wrenable; + reg e_wrenable, f_wrenable, g_wrenable, h_wrenable; + reg [1:0] res_sel; + + reg [ENTRY_SIZE - 1:0] ae0_in, ae1_in, ae2_in; + reg [ENTRY_SIZE - 1:0] ae3_in, ae4_in, ae5_in; + reg [ENTRY_SIZE - 1:0] ae6_in, ae7_in, ae8_in; + + reg [ENTRY_SIZE - 1:0] af0_in, af1_in, af2_in; + reg [ENTRY_SIZE - 1:0] af3_in, af4_in, af5_in; + reg [ENTRY_SIZE - 1:0] af6_in, af7_in, af8_in; + + reg [ENTRY_SIZE - 1:0] bg0_in, bg1_in, bg2_in; + reg [ENTRY_SIZE - 1:0] bg3_in, bg4_in, bg5_in; + reg [ENTRY_SIZE - 1:0] bg6_in, bg7_in, bg8_in; + + reg [ENTRY_SIZE - 1:0] bh0_in, bh1_in, bh2_in; + reg [ENTRY_SIZE - 1:0] bh3_in, bh4_in, bh5_in; + reg [ENTRY_SIZE - 1:0] bh6_in, bh7_in, bh8_in; + + reg [ENTRY_SIZE - 1:0] ce0_in, ce1_in, ce2_in; + reg [ENTRY_SIZE - 1:0] ce3_in, ce4_in, ce5_in; + reg [ENTRY_SIZE - 1:0] ce6_in, ce7_in, ce8_in; + + reg [ENTRY_SIZE - 1:0] cf0_in, cf1_in, cf2_in; + reg [ENTRY_SIZE - 1:0] cf3_in, cf4_in, cf5_in; + reg [ENTRY_SIZE - 1:0] cf6_in, cf7_in, cf8_in; + + reg [ENTRY_SIZE - 1:0] dg0_in, dg1_in, dg2_in; + reg [ENTRY_SIZE - 1:0] dg3_in, dg4_in, dg5_in; + reg [ENTRY_SIZE - 1:0] dg6_in, dg7_in, dg8_in; + + reg [ENTRY_SIZE - 1:0] dh0_in, dh1_in, dh2_in; + reg [ENTRY_SIZE - 1:0] dh3_in, dh4_in, dh5_in; + reg [ENTRY_SIZE - 1:0] dh6_in, dh7_in, dh8_in; + + wire [(9 * ENTRY_SIZE) - 1:0] result; + + multiplier_network #(.ENTRY_SIZE(ENTRY_SIZE)) dut ( + .clk(clk), .a_wrenable(a_wrenable), .b_wrenable(b_wrenable), .c_wrenable(c_wrenable), + .d_wrenable(d_wrenable), .e_wrenable(e_wrenable), .f_wrenable(f_wrenable), + .g_wrenable(g_wrenable), .h_wrenable(h_wrenable), .res_sel (res_sel), .ae0_in (ae0_in), .ae1_in (ae1_in), + .ae2_in (ae2_in), .ae3_in (ae3_in), .ae4_in (ae4_in), .ae5_in (ae5_in), .ae6_in (ae6_in), .ae7_in (ae7_in), + .ae8_in (ae8_in), .af0_in (af0_in), .af1_in (af1_in), .af2_in (af2_in), .af3_in (af3_in), .af4_in (af4_in), + .af5_in (af5_in), .af6_in (af6_in), .af7_in (af7_in), .af8_in (af8_in), .bg0_in (bg0_in), .bg1_in (bg1_in), + .bg2_in (bg2_in), .bg3_in (bg3_in), .bg4_in (bg4_in), .bg5_in (bg5_in), .bg6_in (bg6_in), .bg7_in (bg7_in), + .bg8_in (bg8_in), .bh0_in (bh0_in), .bh1_in (bh1_in), .bh2_in (bh2_in), .bh3_in (bh3_in), .bh4_in (bh4_in), + .bh5_in (bh5_in), .bh6_in (bh6_in), .bh7_in (bh7_in), .bh8_in (bh8_in), .ce0_in (ce0_in), .ce1_in (ce1_in), + .ce2_in (ce2_in), .ce3_in (ce3_in), .ce4_in (ce4_in), .ce5_in (ce5_in), .ce6_in (ce6_in), .ce7_in (ce7_in), + .ce8_in (ce8_in), .cf0_in (cf0_in), .cf1_in (cf1_in), .cf2_in (cf2_in), .cf3_in (cf3_in), .cf4_in (cf4_in), + .cf5_in (cf5_in), .cf6_in (cf6_in), .cf7_in (cf7_in), .cf8_in (cf8_in), .dg0_in (dg0_in), .dg1_in (dg1_in), + .dg2_in (dg2_in), .dg3_in (dg3_in), .dg4_in (dg4_in), .dg5_in (dg5_in), .dg6_in (dg6_in), .dg7_in (dg7_in), + .dg8_in (dg8_in), .dh0_in (dh0_in), .dh1_in (dh1_in), .dh2_in (dh2_in), .dh3_in (dh3_in), .dh4_in (dh4_in), + .dh5_in (dh5_in), .dh6_in (dh6_in), .dh7_in (dh7_in), .dh8_in (dh8_in), .res (result) + ); + + `define FALSE 1'b0 + `define TRUE 1'b1 + + `define ZERO 5'b00000 + `define ONE 5'b00001 + + `define ZERO_MAT {9{`ZERO}} + `define ONE_MAT {9{`ONE}} + `define ID_MAT {{`ONE}, {`ZERO}, {`ZERO}, {`ZERO}, {`ONE}, {`ZERO}, {`ZERO}, {`ZERO}, {`ONE}} + `define MIX_MAT {{`ONE}, {`ONE}, {`ZERO}, {`ZERO}, {`ONE}, {`ZERO}, {`ZERO}, {`ONE}, {`ONE}} + + `define J 2'b00 + `define K 2'b01 + `define L 2'b10 + `define M 2'b11 + + initial begin + $dumpfile("multiplier_network.vcd"); + $dumpvars(); + a_wrenable = `FALSE; b_wrenable = `FALSE; c_wrenable = `FALSE; d_wrenable = `FALSE; + e_wrenable = `FALSE; f_wrenable = `FALSE; g_wrenable = `FALSE; h_wrenable = `FALSE; + + // Load the submatrices of matrix "A" + clk = 0; a_wrenable = `TRUE; b_wrenable = `TRUE; c_wrenable = `TRUE; d_wrenable = `TRUE; #50 + + // A + ae0_in = `ONE; ae1_in = `ZERO; ae2_in = `ZERO; + ae3_in = `ZERO; ae4_in = `ONE; ae5_in = `ZERO; + ae6_in = `ZERO; ae7_in = `ZERO; ae8_in = `ONE; + + af0_in = `ONE; af1_in = `ZERO; af2_in = `ZERO; + af3_in = `ZERO; af4_in = `ONE; af5_in = `ZERO; + af6_in = `ZERO; af7_in = `ZERO; af8_in = `ONE; + + // B + bg0_in = `ZERO; bg1_in = `ZERO; bg2_in = `ZERO; + bg3_in = `ZERO; bg4_in = `ZERO; bg5_in = `ZERO; + bg6_in = `ZERO; bg7_in = `ZERO; bg8_in = `ZERO; + + bh0_in = `ZERO; bh1_in = `ZERO; bh2_in = `ZERO; + bh3_in = `ZERO; bh4_in = `ZERO; bh5_in = `ZERO; + bh6_in = `ZERO; bh7_in = `ZERO; bh8_in = `ZERO; + + // C + ce0_in = `ZERO; ce1_in = `ZERO; ce2_in = `ZERO; + ce3_in = `ZERO; ce4_in = `ZERO; ce5_in = `ZERO; + ce6_in = `ZERO; ce7_in = `ZERO; ce8_in = `ZERO; + + cf0_in = `ZERO; cf1_in = `ZERO; cf2_in = `ZERO; + cf3_in = `ZERO; cf4_in = `ZERO; cf5_in = `ZERO; + cf6_in = `ZERO; cf7_in = `ZERO; cf8_in = `ZERO; + + // D + dg0_in = `ONE; dg1_in = `ZERO; dg2_in = `ZERO; + dg3_in = `ZERO; dg4_in = `ONE; dg5_in = `ZERO; + dg6_in = `ZERO; dg7_in = `ZERO; dg8_in = `ONE; + + dh0_in = `ONE; dh1_in = `ZERO; dh2_in = `ZERO; + dh3_in = `ZERO; dh4_in = `ONE; dh5_in = `ZERO; + dh6_in = `ZERO; dh7_in = `ZERO; dh8_in = `ONE; + + clk = 1; #50 + + a_wrenable = `FALSE; b_wrenable = `FALSE; c_wrenable = `FALSE; d_wrenable = `FALSE; + + // Test correct loading of A + if ( + {{dut.AE.a0_out}, {dut.AE.a1_out}, {dut.AE.a2_out}, + {dut.AE.a3_out}, {dut.AE.a4_out}, {dut.AE.a5_out}, + {dut.AE.a6_out}, {dut.AE.a7_out}, {dut.AE.a8_out}} !== `ID_MAT + ) begin + $display("Test failed: wrong values loaded into A of AE"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + dut.AE.a0_out, dut.AE.a1_out, dut.AE.a2_out, + dut.AE.a3_out, dut.AE.a4_out, dut.AE.a5_out, + dut.AE.a6_out, dut.AE.a7_out, dut.AE.a8_out + ); + end + + if ( + {{dut.AF.a0_out}, {dut.AF.a1_out}, {dut.AF.a2_out}, + {dut.AF.a3_out}, {dut.AF.a4_out}, {dut.AF.a5_out}, + {dut.AF.a6_out}, {dut.AF.a7_out}, {dut.AF.a8_out}} !== `ID_MAT + ) begin + $display("Test failed: wrong values loaded into A of AF"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + dut.AF.a0_out, dut.AF.a1_out, dut.AF.a2_out, + dut.AF.a3_out, dut.AF.a4_out, dut.AF.a5_out, + dut.AF.a6_out, dut.AF.a7_out, dut.AF.a8_out + ); + end + + // Test correct loading of B + if ( + {{dut.BG.a0_out}, {dut.BG.a1_out}, {dut.BG.a2_out}, + {dut.BG.a3_out}, {dut.BG.a4_out}, {dut.BG.a5_out}, + {dut.BG.a6_out}, {dut.BG.a7_out}, {dut.BG.a8_out}} !== `ZERO_MAT + ) begin + $display("Test failed: wrong values loaded into B of BG"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + dut.BG.a0_out, dut.BG.a1_out, dut.BG.a2_out, + dut.BG.a3_out, dut.BG.a4_out, dut.BG.a5_out, + dut.BG.a6_out, dut.BG.a7_out, dut.BG.a8_out + ); + end + + if ( + {{dut.BH.a0_out}, {dut.BH.a1_out}, {dut.BH.a2_out}, + {dut.BH.a3_out}, {dut.BH.a4_out}, {dut.BH.a5_out}, + {dut.BH.a6_out}, {dut.BH.a7_out}, {dut.BH.a8_out}} !== `ZERO_MAT + ) begin + $display("Test failed: wrong values loaded into B of BH"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + dut.BH.a0_out, dut.BH.a1_out, dut.BH.a2_out, + dut.BH.a3_out, dut.BH.a4_out, dut.BH.a5_out, + dut.BH.a6_out, dut.BH.a7_out, dut.BH.a8_out + ); + end + + // Test correct loading of C + if ( + {{dut.CE.a0_out}, {dut.CE.a1_out}, {dut.CE.a2_out}, + {dut.CE.a3_out}, {dut.CE.a4_out}, {dut.CE.a5_out}, + {dut.CE.a6_out}, {dut.CE.a7_out}, {dut.CE.a8_out}} !== `ZERO_MAT + ) begin + $display("Test failed: wrong values loaded into C of CE"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + dut.CE.a0_out, dut.CE.a1_out, dut.CE.a2_out, + dut.CE.a3_out, dut.CE.a4_out, dut.CE.a5_out, + dut.CE.a6_out, dut.CE.a7_out, dut.CE.a8_out + ); + end + + if ( + {{dut.CF.a0_out}, {dut.CF.a1_out}, {dut.CF.a2_out}, + {dut.CF.a3_out}, {dut.CF.a4_out}, {dut.CF.a5_out}, + {dut.CF.a6_out}, {dut.CF.a7_out}, {dut.CF.a8_out}} !== `ZERO_MAT + ) begin + $display("Test failed: wrong values loaded into C of CF"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + dut.CF.a0_out, dut.CF.a1_out, dut.CF.a2_out, + dut.CF.a3_out, dut.CF.a4_out, dut.CF.a5_out, + dut.CF.a6_out, dut.CF.a7_out, dut.CF.a8_out + ); + end + + // Test correct loading of D + if ( + {{dut.DG.a0_out}, {dut.DG.a1_out}, {dut.DG.a2_out}, + {dut.DG.a3_out}, {dut.DG.a4_out}, {dut.DG.a5_out}, + {dut.DG.a6_out}, {dut.DG.a7_out}, {dut.DG.a8_out}} !== `ID_MAT + ) begin + $display("Test failed: wrong values loaded into D of DG"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + dut.DG.a0_out, dut.DG.a1_out, dut.DG.a2_out, + dut.DG.a3_out, dut.DG.a4_out, dut.DG.a5_out, + dut.DG.a6_out, dut.DG.a7_out, dut.DG.a8_out + ); + end + + if ( + {{dut.DH.a0_out}, {dut.DH.a1_out}, {dut.DH.a2_out}, + {dut.DH.a3_out}, {dut.DH.a4_out}, {dut.DH.a5_out}, + {dut.DH.a6_out}, {dut.DH.a7_out}, {dut.DH.a8_out}} !== `ID_MAT + ) begin + $display("Test failed: wrong values loaded into D of DH"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + dut.DH.a0_out, dut.DH.a1_out, dut.DH.a2_out, + dut.DH.a3_out, dut.DH.a4_out, dut.DH.a5_out, + dut.DH.a6_out, dut.DH.a7_out, dut.DH.a8_out + ); + end + + // Load submatrices of matrix "B" + clk = 0; e_wrenable = `TRUE; f_wrenable = `TRUE; g_wrenable = `TRUE; h_wrenable = `TRUE; #50 + + // E + ae0_in = `ONE; ae1_in = `ONE; ae2_in = `ZERO; + ae3_in = `ZERO; ae4_in = `ONE; ae5_in = `ZERO; + ae6_in = `ZERO; ae7_in = `ONE; ae8_in = `ONE; + + ce0_in = `ONE; ce1_in = `ONE; ce2_in = `ZERO; + ce3_in = `ZERO; ce4_in = `ONE; ce5_in = `ZERO; + ce6_in = `ZERO; ce7_in = `ONE; ce8_in = `ONE; + + // F + af0_in = `ONE; af1_in = `ONE; af2_in = `ONE; + af3_in = `ONE; af4_in = `ONE; af5_in = `ONE; + af6_in = `ONE; af7_in = `ONE; af8_in = `ONE; + + cf0_in = `ONE; cf1_in = `ONE; cf2_in = `ONE; + cf3_in = `ONE; cf4_in = `ONE; cf5_in = `ONE; + cf6_in = `ONE; cf7_in = `ONE; cf8_in = `ONE; + + // G + bg0_in = `ONE; bg1_in = `ONE; bg2_in = `ZERO; + bg3_in = `ZERO; bg4_in = `ONE; bg5_in = `ZERO; + bg6_in = `ZERO; bg7_in = `ONE; bg8_in = `ONE; + + dg0_in = `ONE; dg1_in = `ONE; dg2_in = `ZERO; + dg3_in = `ZERO; dg4_in = `ONE; dg5_in = `ZERO; + dg6_in = `ZERO; dg7_in = `ONE; dg8_in = `ONE; + + // H + bh0_in = `ONE; bh1_in = `ONE; bh2_in = `ONE; + bh3_in = `ONE; bh4_in = `ONE; bh5_in = `ONE; + bh6_in = `ONE; bh7_in = `ONE; bh8_in = `ONE; + + dh0_in = `ONE; dh1_in = `ONE; dh2_in = `ONE; + dh3_in = `ONE; dh4_in = `ONE; dh5_in = `ONE; + dh6_in = `ONE; dh7_in = `ONE; dh8_in = `ONE; + + clk = 1; #50 + + e_wrenable = `FALSE; f_wrenable = `FALSE; g_wrenable = `FALSE; h_wrenable = `FALSE; + + clk = 0; + + // Test correct loading of E + if ( + {{dut.AE.b0_out}, {dut.AE.b1_out}, {dut.AE.b2_out}, + {dut.AE.b3_out}, {dut.AE.b4_out}, {dut.AE.b5_out}, + {dut.AE.b6_out}, {dut.AE.b7_out}, {dut.AE.b8_out}} !== `MIX_MAT + ) begin + $display("Test failed: wrong values loaded into E of AE"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + dut.AE.b0_out, dut.AE.b1_out, dut.AE.b2_out, + dut.AE.b3_out, dut.AE.b4_out, dut.AE.b5_out, + dut.AE.b6_out, dut.AE.b7_out, dut.AE.b8_out + ); + end + + if ( + {{dut.CE.b0_out}, {dut.CE.b1_out}, {dut.CE.b2_out}, + {dut.CE.b3_out}, {dut.CE.b4_out}, {dut.CE.b5_out}, + {dut.CE.b6_out}, {dut.CE.b7_out}, {dut.CE.b8_out}} !== `MIX_MAT + ) begin + $display("Test failed: wrong values loaded into E of CE"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + dut.CE.b0_out, dut.CE.b1_out, dut.CE.b2_out, + dut.CE.b3_out, dut.CE.b4_out, dut.CE.b5_out, + dut.CE.b6_out, dut.CE.b7_out, dut.CE.b8_out + ); + end + + // Test correct loading of F + if ( + {{dut.AF.b0_out}, {dut.AF.b1_out}, {dut.AF.b2_out}, + {dut.AF.b3_out}, {dut.AF.b4_out}, {dut.AF.b5_out}, + {dut.AF.b6_out}, {dut.AF.b7_out}, {dut.AF.b8_out}} !== `ONE_MAT + ) begin + $display("Test failed: wrong values loaded into F of AF"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + dut.AF.b0_out, dut.AF.b1_out, dut.AF.b2_out, + dut.AF.b3_out, dut.AF.b4_out, dut.AF.b5_out, + dut.AF.b6_out, dut.AF.b7_out, dut.AF.b8_out + ); + end + + if ( + {{dut.CF.b0_out}, {dut.CF.b1_out}, {dut.CF.b2_out}, + {dut.CF.b3_out}, {dut.CF.b4_out}, {dut.CF.b5_out}, + {dut.CF.b6_out}, {dut.CF.b7_out}, {dut.CF.b8_out}} !== `ONE_MAT + ) begin + $display("Test failed: wrong values loaded into F of CF"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + dut.CF.b0_out, dut.CF.b1_out, dut.CF.b2_out, + dut.CF.b3_out, dut.CF.b4_out, dut.CF.b5_out, + dut.CF.b6_out, dut.CF.b7_out, dut.CF.b8_out + ); + end + + + // Test correct loading of G + if ( + {{dut.BG.b0_out}, {dut.BG.b1_out}, {dut.BG.b2_out}, + {dut.BG.b3_out}, {dut.BG.b4_out}, {dut.BG.b5_out}, + {dut.BG.b6_out}, {dut.BG.b7_out}, {dut.BG.b8_out}} !== `MIX_MAT + ) begin + $display("Test failed: wrong values loaded into G of BG"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + dut.BG.b0_out, dut.BG.b1_out, dut.BG.b2_out, + dut.BG.b3_out, dut.BG.b4_out, dut.BG.b5_out, + dut.BG.b6_out, dut.BG.b7_out, dut.BG.b8_out + ); + end + + if ( + {{dut.DG.b0_out}, {dut.DG.b1_out}, {dut.DG.b2_out}, + {dut.DG.b3_out}, {dut.DG.b4_out}, {dut.DG.b5_out}, + {dut.DG.b6_out}, {dut.DG.b7_out}, {dut.DG.b8_out}} !== `MIX_MAT + ) begin + $display("Test failed: wrong values loaded into G of DG"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + dut.DG.b0_out, dut.DG.b1_out, dut.DG.b2_out, + dut.DG.b3_out, dut.DG.b4_out, dut.DG.b5_out, + dut.DG.b6_out, dut.DG.b7_out, dut.DG.b8_out + ); + end + + // Test correct loading of H + if ( + {{dut.BH.b0_out}, {dut.BH.b1_out}, {dut.BH.b2_out}, + {dut.BH.b3_out}, {dut.BH.b4_out}, {dut.BH.b5_out}, + {dut.BH.b6_out}, {dut.BH.b7_out}, {dut.BH.b8_out}} !== `ONE_MAT + ) begin + $display("Test failed: wrong values loaded into H of BH"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + dut.BH.b0_out, dut.BH.b1_out, dut.BH.b2_out, + dut.BH.b3_out, dut.BH.b4_out, dut.BH.b5_out, + dut.BH.b6_out, dut.BH.b7_out, dut.BH.b8_out + ); + end + + if ( + {{dut.DH.b0_out}, {dut.DH.b1_out}, {dut.DH.b2_out}, + {dut.DH.b3_out}, {dut.DH.b4_out}, {dut.DH.b5_out}, + {dut.DH.b6_out}, {dut.DH.b7_out}, {dut.DH.b8_out}} !== `ONE_MAT + ) begin + $display("Test failed: wrong values loaded into H of DH"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + dut.DH.b0_out, dut.DH.b1_out, dut.DH.b2_out, + dut.DH.b3_out, dut.DH.b4_out, dut.DH.b5_out, + dut.DH.b6_out, dut.DH.b7_out, dut.DH.b8_out + ); + end + + clk = 0; #50 + clk = 1; + res_sel = `J; #10 + if (result !== `MIX_MAT) begin + $display("Test failed: submatrix J contains wrong values"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + result[(9 * ENTRY_SIZE) - 1: (8 * ENTRY_SIZE)], + result[(8 * ENTRY_SIZE) - 1: (7 * ENTRY_SIZE)], + result[(7 * ENTRY_SIZE) - 1: (6 * ENTRY_SIZE)], + result[(6 * ENTRY_SIZE) - 1: (5 * ENTRY_SIZE)], + result[(5 * ENTRY_SIZE) - 1: (4 * ENTRY_SIZE)], + result[(4 * ENTRY_SIZE) - 1: (3 * ENTRY_SIZE)], + result[(3 * ENTRY_SIZE) - 1: (2 * ENTRY_SIZE)], + result[(2 * ENTRY_SIZE) - 1: (1 * ENTRY_SIZE)], + result[(1 * ENTRY_SIZE) - 1: (0 * ENTRY_SIZE)] + ); + end + + res_sel = `K; #10 + if (result !== `ONE_MAT) begin + $display("Test failed: submatrix K contains wrong values"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + result[(9 * ENTRY_SIZE) - 1: (8 * ENTRY_SIZE)], + result[(8 * ENTRY_SIZE) - 1: (7 * ENTRY_SIZE)], + result[(7 * ENTRY_SIZE) - 1: (6 * ENTRY_SIZE)], + result[(6 * ENTRY_SIZE) - 1: (5 * ENTRY_SIZE)], + result[(5 * ENTRY_SIZE) - 1: (4 * ENTRY_SIZE)], + result[(4 * ENTRY_SIZE) - 1: (3 * ENTRY_SIZE)], + result[(3 * ENTRY_SIZE) - 1: (2 * ENTRY_SIZE)], + result[(2 * ENTRY_SIZE) - 1: (1 * ENTRY_SIZE)], + result[(1 * ENTRY_SIZE) - 1: (0 * ENTRY_SIZE)] + ); + end + + res_sel = `L; #10 + if (result !== `MIX_MAT) begin + $display("Test failed: submatrix L contains wrong values"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + result[(9 * ENTRY_SIZE) - 1: (8 * ENTRY_SIZE)], + result[(8 * ENTRY_SIZE) - 1: (7 * ENTRY_SIZE)], + result[(7 * ENTRY_SIZE) - 1: (6 * ENTRY_SIZE)], + result[(6 * ENTRY_SIZE) - 1: (5 * ENTRY_SIZE)], + result[(5 * ENTRY_SIZE) - 1: (4 * ENTRY_SIZE)], + result[(4 * ENTRY_SIZE) - 1: (3 * ENTRY_SIZE)], + result[(3 * ENTRY_SIZE) - 1: (2 * ENTRY_SIZE)], + result[(2 * ENTRY_SIZE) - 1: (1 * ENTRY_SIZE)], + result[(1 * ENTRY_SIZE) - 1: (0 * ENTRY_SIZE)] + ); + end + + res_sel = `M; #10 + if (result !== `ONE_MAT) begin + $display("Test failed: submatrix M contains wrong values"); + $display("\t%d\t%d\t%d\n\t%d\t%d\t%d\n\t%d\t%d\t%d", + result[(9 * ENTRY_SIZE) - 1: (8 * ENTRY_SIZE)], + result[(8 * ENTRY_SIZE) - 1: (7 * ENTRY_SIZE)], + result[(7 * ENTRY_SIZE) - 1: (6 * ENTRY_SIZE)], + result[(6 * ENTRY_SIZE) - 1: (5 * ENTRY_SIZE)], + result[(5 * ENTRY_SIZE) - 1: (4 * ENTRY_SIZE)], + result[(4 * ENTRY_SIZE) - 1: (3 * ENTRY_SIZE)], + result[(3 * ENTRY_SIZE) - 1: (2 * ENTRY_SIZE)], + result[(2 * ENTRY_SIZE) - 1: (1 * ENTRY_SIZE)], + result[(1 * ENTRY_SIZE) - 1: (0 * ENTRY_SIZE)] + ); + end + + $finish; + end +endmodule // multiplier_network_TEST \ No newline at end of file diff --git a/multiplier_network.v b/multiplier_network.v new file mode 100644 index 0000000..d928889 --- /dev/null +++ b/multiplier_network.v @@ -0,0 +1,204 @@ +/* +Network of 8 multiplier units +*/ + +`include "multiplier.v" +`include "add3by3.v" +`include "multiplexer.v" + +module multiplier_network + #(parameter ENTRY_SIZE = 5)( + input clk, + input [1:0] res_sel, + input a_wrenable, + input b_wrenable, + input c_wrenable, + input d_wrenable, + input e_wrenable, + input f_wrenable, + input g_wrenable, + input h_wrenable, + input [ENTRY_SIZE - 1:0] ae0_in, ae1_in, ae2_in, + input [ENTRY_SIZE - 1:0] ae3_in, ae4_in, ae5_in, + input [ENTRY_SIZE - 1:0] ae6_in, ae7_in, ae8_in, + input [ENTRY_SIZE - 1:0] af0_in, af1_in, af2_in, + input [ENTRY_SIZE - 1:0] af3_in, af4_in, af5_in, + input [ENTRY_SIZE - 1:0] af6_in, af7_in, af8_in, + input [ENTRY_SIZE - 1:0] bg0_in, bg1_in, bg2_in, + input [ENTRY_SIZE - 1:0] bg3_in, bg4_in, bg5_in, + input [ENTRY_SIZE - 1:0] bg6_in, bg7_in, bg8_in, + input [ENTRY_SIZE - 1:0] bh0_in, bh1_in, bh2_in, + input [ENTRY_SIZE - 1:0] bh3_in, bh4_in, bh5_in, + input [ENTRY_SIZE - 1:0] bh6_in, bh7_in, bh8_in, + input [ENTRY_SIZE - 1:0] ce0_in, ce1_in, ce2_in, + input [ENTRY_SIZE - 1:0] ce3_in, ce4_in, ce5_in, + input [ENTRY_SIZE - 1:0] ce6_in, ce7_in, ce8_in, + input [ENTRY_SIZE - 1:0] cf0_in, cf1_in, cf2_in, + input [ENTRY_SIZE - 1:0] cf3_in, cf4_in, cf5_in, + input [ENTRY_SIZE - 1:0] cf6_in, cf7_in, cf8_in, + input [ENTRY_SIZE - 1:0] dg0_in, dg1_in, dg2_in, + input [ENTRY_SIZE - 1:0] dg3_in, dg4_in, dg5_in, + input [ENTRY_SIZE - 1:0] dg6_in, dg7_in, dg8_in, + input [ENTRY_SIZE - 1:0] dh0_in, dh1_in, dh2_in, + input [ENTRY_SIZE - 1:0] dh3_in, dh4_in, dh5_in, + input [ENTRY_SIZE - 1:0] dh6_in, dh7_in, dh8_in, + output [(9 * ENTRY_SIZE) - 1:0] res +); + + + wire [ENTRY_SIZE - 1:0] ae0, ae1, ae2; + wire [ENTRY_SIZE - 1:0] ae3, ae4, ae5; + wire [ENTRY_SIZE - 1:0] ae6, ae7, ae8; + wire [ENTRY_SIZE - 1:0] af0, af1, af2; + wire [ENTRY_SIZE - 1:0] af3, af4, af5; + wire [ENTRY_SIZE - 1:0] af6, af7, af8; + wire [ENTRY_SIZE - 1:0] bg0, bg1, bg2; + wire [ENTRY_SIZE - 1:0] bg3, bg4, bg5; + wire [ENTRY_SIZE - 1:0] bg6, bg7, bg8; + wire [ENTRY_SIZE - 1:0] bh0, bh1, bh2; + wire [ENTRY_SIZE - 1:0] bh3, bh4, bh5; + wire [ENTRY_SIZE - 1:0] bh6, bh7, bh8; + wire [ENTRY_SIZE - 1:0] ce0, ce1, ce2; + wire [ENTRY_SIZE - 1:0] ce3, ce4, ce5; + wire [ENTRY_SIZE - 1:0] ce6, ce7, ce8; + wire [ENTRY_SIZE - 1:0] cf0, cf1, cf2; + wire [ENTRY_SIZE - 1:0] cf3, cf4, cf5; + wire [ENTRY_SIZE - 1:0] cf6, cf7, cf8; + wire [ENTRY_SIZE - 1:0] dg0, dg1, dg2; + wire [ENTRY_SIZE - 1:0] dg3, dg4, dg5; + wire [ENTRY_SIZE - 1:0] dg6, dg7, dg8; + wire [ENTRY_SIZE - 1:0] dh0, dh1, dh2; + wire [ENTRY_SIZE - 1:0] dh3, dh4, dh5; + wire [ENTRY_SIZE - 1:0] dh6, dh7, dh8; + + multiplier #(.ENTRY_SIZE(ENTRY_SIZE)) AE ( + .clk(clk), .a_wrenable(a_wrenable), .b_wrenable(e_wrenable), + .a0_in(ae0_in), .a1_in(ae1_in), .a2_in(ae2_in), .a3_in(ae3_in), + .a4_in(ae4_in), .a5_in(ae5_in), .a6_in(ae6_in), .a7_in(ae7_in), .a8_in(ae8_in), + .b0_in(ae0_in), .b1_in(ae1_in), .b2_in(ae2_in), .b3_in(ae3_in), + .b4_in(ae4_in), .b5_in(ae5_in), .b6_in(ae6_in), .b7_in(ae7_in), .b8_in(ae8_in), + .c0_out(ae0), .c1_out(ae1), .c2_out(ae2), .c3_out(ae3), + .c4_out(ae4), .c5_out(ae5), .c6_out(ae6), .c7_out(ae7), .c8_out(ae8) + ); + + multiplier #(.ENTRY_SIZE(ENTRY_SIZE)) AF ( + .clk(clk), .a_wrenable(a_wrenable), .b_wrenable(f_wrenable), + .a0_in(af0_in), .a1_in(af1_in), .a2_in(af2_in), .a3_in(af3_in), + .a4_in(af4_in), .a5_in(af5_in), .a6_in(af6_in), .a7_in(af7_in), .a8_in(af8_in), + .b0_in(af0_in), .b1_in(af1_in), .b2_in(af2_in), .b3_in(af3_in), + .b4_in(af4_in), .b5_in(af5_in), .b6_in(af6_in), .b7_in(af7_in), .b8_in(af8_in), + .c0_out(af0), .c1_out(af1), .c2_out(af2), .c3_out(af3), + .c4_out(af4), .c5_out(af5), .c6_out(af6), .c7_out(af7), .c8_out(af8) + ); + + multiplier #(.ENTRY_SIZE(ENTRY_SIZE)) BG ( + .clk(clk), .a_wrenable(b_wrenable), .b_wrenable(g_wrenable), + .a0_in(bg0_in), .a1_in(bg1_in), .a2_in(bg2_in), .a3_in(bg3_in), + .a4_in(bg4_in), .a5_in(bg5_in), .a6_in(bg6_in), .a7_in(bg7_in), .a8_in(bg8_in), + .b0_in(bg0_in), .b1_in(bg1_in), .b2_in(bg2_in), .b3_in(bg3_in), + .b4_in(bg4_in), .b5_in(bg5_in), .b6_in(bg6_in), .b7_in(bg7_in), .b8_in(bg8_in), + .c0_out(bg0), .c1_out(bg1), .c2_out(bg2), .c3_out(bg3), + .c4_out(bg4), .c5_out(bg5), .c6_out(bg6), .c7_out(bg7), .c8_out(bg8) + ); + + multiplier #(.ENTRY_SIZE(ENTRY_SIZE)) BH ( + .clk(clk), .a_wrenable(b_wrenable), .b_wrenable(h_wrenable), + .a0_in(bh0_in), .a1_in(bh1_in), .a2_in(bh2_in), .a3_in(bh3_in), + .a4_in(bh4_in), .a5_in(bh5_in), .a6_in(bh6_in), .a7_in(bh7_in), .a8_in(bh8_in), + .b0_in(bh0_in), .b1_in(bh1_in), .b2_in(bh2_in), .b3_in(bh3_in), + .b4_in(bh4_in), .b5_in(bh5_in), .b6_in(bh6_in), .b7_in(bh7_in), .b8_in(bh8_in), + .c0_out(bh0), .c1_out(bh1), .c2_out(bh2), .c3_out(bh3), + .c4_out(bh4), .c5_out(bh5), .c6_out(bh6), .c7_out(bh7), .c8_out(bh8) + ); + + multiplier #(.ENTRY_SIZE(ENTRY_SIZE)) CE ( + .clk(clk), .a_wrenable(c_wrenable), .b_wrenable(e_wrenable), + .a0_in(ce0_in), .a1_in(ce1_in), .a2_in(ce2_in), .a3_in(ce3_in), + .a4_in(ce4_in), .a5_in(ce5_in), .a6_in(ce6_in), .a7_in(ce7_in), .a8_in(ce8_in), + .b0_in(ce0_in), .b1_in(ce1_in), .b2_in(ce2_in), .b3_in(ce3_in), + .b4_in(ce4_in), .b5_in(ce5_in), .b6_in(ce6_in), .b7_in(ce7_in), .b8_in(ce8_in), + .c0_out(ce0), .c1_out(ce1), .c2_out(ce2), .c3_out(ce3), + .c4_out(ce4), .c5_out(ce5), .c6_out(ce6), .c7_out(ce7), .c8_out(ce8) + ); + + multiplier #(.ENTRY_SIZE(ENTRY_SIZE)) CF ( + .clk(clk), .a_wrenable(c_wrenable), .b_wrenable(f_wrenable), + .a0_in(cf0_in), .a1_in(cf1_in), .a2_in(cf2_in), .a3_in(cf3_in), + .a4_in(cf4_in), .a5_in(cf5_in), .a6_in(cf6_in), .a7_in(cf7_in), .a8_in(cf8_in), + .b0_in(cf0_in), .b1_in(cf1_in), .b2_in(cf2_in), .b3_in(cf3_in), + .b4_in(cf4_in), .b5_in(cf5_in), .b6_in(cf6_in), .b7_in(cf7_in), .b8_in(cf8_in), + .c0_out(cf0), .c1_out(cf1), .c2_out(cf2), .c3_out(cf3), + .c4_out(cf4), .c5_out(cf5), .c6_out(cf6), .c7_out(cf7), .c8_out(cf8) + ); + + multiplier #(.ENTRY_SIZE(ENTRY_SIZE)) DG ( + .clk(clk), .a_wrenable(d_wrenable), .b_wrenable(g_wrenable), + .a0_in(dg0_in), .a1_in(dg1_in), .a2_in(dg2_in), .a3_in(dg3_in), + .a4_in(dg4_in), .a5_in(dg5_in), .a6_in(dg6_in), .a7_in(dg7_in), .a8_in(dg8_in), + .b0_in(dg0_in), .b1_in(dg1_in), .b2_in(dg2_in), .b3_in(dg3_in), + .b4_in(dg4_in), .b5_in(dg5_in), .b6_in(dg6_in), .b7_in(dg7_in), .b8_in(dg8_in), + .c0_out(dg0), .c1_out(dg1), .c2_out(dg2), .c3_out(dg3), + .c4_out(dg4), .c5_out(dg5), .c6_out(dg6), .c7_out(dg7), .c8_out(dg8) + ); + + multiplier #(.ENTRY_SIZE(ENTRY_SIZE)) DH ( + .clk(clk), .a_wrenable(d_wrenable), .b_wrenable(h_wrenable), + .a0_in(dh0_in), .a1_in(dh1_in), .a2_in(dh2_in), .a3_in(dh3_in), + .a4_in(dh4_in), .a5_in(dh5_in), .a6_in(dh6_in), .a7_in(dh7_in), .a8_in(dh8_in), + .b0_in(dh0_in), .b1_in(dh1_in), .b2_in(dh2_in), .b3_in(dh3_in), + .b4_in(dh4_in), .b5_in(dh5_in), .b6_in(dh6_in), .b7_in(dh7_in), .b8_in(dh8_in), + .c0_out(dh0), .c1_out(dh1), .c2_out(dh2), .c3_out(dh3), + .c4_out(dh4), .c5_out(dh5), .c6_out(dh6), .c7_out(dh7), .c8_out(dh8) + ); + + wire [ENTRY_SIZE - 1:0] ae_bg_sum0, ae_bg_sum1, ae_bg_sum2; + wire [ENTRY_SIZE - 1:0] ae_bg_sum3, ae_bg_sum4, ae_bg_sum5; + wire [ENTRY_SIZE - 1:0] ae_bg_sum6, ae_bg_sum7, ae_bg_sum8; + wire [ENTRY_SIZE - 1:0] af_bh_sum0, af_bh_sum1, af_bh_sum2; + wire [ENTRY_SIZE - 1:0] af_bh_sum3, af_bh_sum4, af_bh_sum5; + wire [ENTRY_SIZE - 1:0] af_bh_sum6, af_bh_sum7, af_bh_sum8; + wire [ENTRY_SIZE - 1:0] ce_dg_sum0, ce_dg_sum1, ce_dg_sum2; + wire [ENTRY_SIZE - 1:0] ce_dg_sum3, ce_dg_sum4, ce_dg_sum5; + wire [ENTRY_SIZE - 1:0] ce_dg_sum6, ce_dg_sum7, ce_dg_sum8; + wire [ENTRY_SIZE - 1:0] cf_dh_sum0, cf_dh_sum1, cf_dh_sum2; + wire [ENTRY_SIZE - 1:0] cf_dh_sum3, cf_dh_sum4, cf_dh_sum5; + wire [ENTRY_SIZE - 1:0] cf_dh_sum6, cf_dh_sum7, cf_dh_sum8; + + add3by3 #(.ENTRY_SIZE(ENTRY_SIZE)) AEplusBG ( + .a0(ae0), .a1(ae1), .a2(ae2), .a3(ae3), .a4(ae4), .a5(ae5), .a6(ae6), .a7(ae7), .a8(ae8), + .b0(bg0), .b1(bg1), .b2(bg2), .b3(bg3), .b4(bg4), .b5(bg5), .b6(bg6), .b7(bg7), .b8(bg8), + .c0(ae_bg_sum0), .c1(ae_bg_sum1), .c2(ae_bg_sum2), .c3(ae_bg_sum3), .c4(ae_bg_sum4), + .c5(ae_bg_sum5), .c6(ae_bg_sum6), .c7(ae_bg_sum7), .c8(ae_bg_sum8) + ); + + add3by3 #(.ENTRY_SIZE(ENTRY_SIZE)) AFplusBH ( + .a0(af0), .a1(af1), .a2(af2), .a3(af3), .a4(af4), .a5(af5), .a6(af6), .a7(af7), .a8(af8), + .b0(bh0), .b1(bh1), .b2(bh2), .b3(bh3), .b4(bh4), .b5(bh5), .b6(bh6), .b7(bh7), .b8(bh8), + .c0(af_bh_sum0), .c1(af_bh_sum1), .c2(af_bh_sum2), .c3(af_bh_sum3), .c4(af_bh_sum4), + .c5(af_bh_sum5), .c6(af_bh_sum6), .c7(af_bh_sum7), .c8(af_bh_sum8) + ); + + add3by3 #(.ENTRY_SIZE(ENTRY_SIZE)) CEplusDG ( + .a0(ce0), .a1(ce1), .a2(ce2), .a3(ce3), .a4(ce4), .a5(ce5), .a6(ce6), .a7(ce7), .a8(ce8), + .b0(dg0), .b1(dg1), .b2(dg2), .b3(dg3), .b4(dg4), .b5(dg5), .b6(dg6), .b7(dg7), .b8(dg8), + .c0(ce_dg_sum0), .c1(ce_dg_sum1), .c2(ce_dg_sum2), .c3(ce_dg_sum3), .c4(ce_dg_sum4), + .c5(ce_dg_sum5), .c6(ce_dg_sum6), .c7(ce_dg_sum7), .c8(ce_dg_sum8) + ); + + add3by3 #(.ENTRY_SIZE(ENTRY_SIZE)) CFplusDH ( + .a0(cf0), .a1(cf1), .a2(cf2), .a3(cf3), .a4(cf4), .a5(cf5), .a6(cf6), .a7(cf7), .a8(cf8), + .b0(dh0), .b1(dh1), .b2(dh2), .b3(dh3), .b4(dh4), .b5(dh5), .b6(dh6), .b7(dh7), .b8(dh8), + .c0(cf_dh_sum0), .c1(cf_dh_sum1), .c2(cf_dh_sum2), .c3(cf_dh_sum3), .c4(cf_dh_sum4), + .c5(cf_dh_sum5), .c6(cf_dh_sum6), .c7(cf_dh_sum7), .c8(cf_dh_sum8) + ); + + `define AEBG {{ae_bg_sum0}, {ae_bg_sum1}, {ae_bg_sum2}, {ae_bg_sum3}, {ae_bg_sum4}, {ae_bg_sum5}, {ae_bg_sum6}, {ae_bg_sum7}, {ae_bg_sum8}} + `define AFBH {{af_bh_sum0}, {af_bh_sum1}, {af_bh_sum2}, {af_bh_sum3}, {af_bh_sum4}, {af_bh_sum5}, {af_bh_sum6}, {af_bh_sum7}, {af_bh_sum8}} + `define CEDG {{ce_dg_sum0}, {ce_dg_sum1}, {ce_dg_sum2}, {ce_dg_sum3}, {ce_dg_sum4}, {ce_dg_sum5}, {ce_dg_sum6}, {ce_dg_sum7}, {ce_dg_sum8}} + `define CFDH {{cf_dh_sum0}, {cf_dh_sum1}, {cf_dh_sum2}, {cf_dh_sum3}, {cf_dh_sum4}, {cf_dh_sum5}, {cf_dh_sum6}, {cf_dh_sum7}, {cf_dh_sum8}} + + multiplexer4to1 #( + .ENTRY_SIZE(ENTRY_SIZE)) chooseres (.res_sel (res_sel), .AEplusBG(`AEBG), .AFplusBH(`AFBH), + .CEplusDG(`CEDG), .CFplusDH(`CFDH), .result (res) + ); +endmodule // multiplier_network \ No newline at end of file diff --git a/prog_mem.dat b/prog_mem.dat new file mode 100644 index 0000000..ad8f8f8 --- /dev/null +++ b/prog_mem.dat @@ -0,0 +1,64 @@ +00000 +00001 +00010 +00011 +00100 +00101 +00110 +00111 +01000 +01001 +01010 +01011 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 +00000 diff --git a/prog_memory.t.v b/prog_memory.t.v new file mode 100644 index 0000000..347a45d --- /dev/null +++ b/prog_memory.t.v @@ -0,0 +1,73 @@ +/* +Test bench for program memory module. +*/ + +`include "prog_memory.v" + +module memory_TEST(); + parameter ENTRY_SIZE = 5; + parameter ADDRESS_WIDTH = 32; + parameter NUM_TESTS = 3; + + reg clk; + reg [ADDRESS_WIDTH - 1:0] addr; + + wire [ENTRY_SIZE - 1:0] data; + + prog_memory #(.width(ENTRY_SIZE), .addresswidth(ADDRESS_WIDTH)) dut( + .clk(clk), .data(data), + .addr(addr) + ); + + reg[5:0] success_count = 0; + + initial clk = 0; + always #10 clk = !clk; + initial begin + $dumpfile("prog_memory.vcd"); + $dumpvars(0, memory_TEST, dut.memory[0]); + + + // Test Case 1: Read first memory element + addr = 32'd0; + #20 + if (data === 5'b0) begin + success_count = success_count + 1; + end + else begin + $display("Test case 1 failed: expected %b got %b", 5'b0, data); + end + + // Test Case 2: Read second memory element + addr = 32'd1; + #20 + if (data === 5'b1) begin + success_count = success_count + 1; + end + else begin + $display("Test case 2 failed: expected %b got %b", 5'b1, data); + end + + // Test Case 3: Read tenth memory element + addr = 32'd9; + #20 + if (data === 5'b01001) begin + success_count = success_count + 1; + end + else begin + $display("Test case 3 failed: expected %b got %b", 5'b01001, data); + end + + #10 + + if (success_count < NUM_TESTS) begin + $display("\nProgram Memory Failed %d Tests\n",(NUM_TESTS-success_count)); + end + else begin + $display("Program Memory Passed All %d tests", NUM_TESTS); + end + + + $finish; + end +endmodule diff --git a/prog_memory.v b/prog_memory.v new file mode 100644 index 0000000..5ef161c --- /dev/null +++ b/prog_memory.v @@ -0,0 +1,25 @@ +/* +The memory where matrices are stored. +*/ + +// TODO (arianaolson419): Allow multiple data access in some fashion. +module prog_memory +#( + parameter addresswidth = 32, + parameter depth = addresswidth * 2, + parameter width = 5 +) +( + input clk, + output [width-1:0] data, + input [addresswidth-1:0] addr +); + + + reg [width-1:0] memory [depth-1:0]; + + + initial $readmemb("prog_mem.dat", memory); + assign data = memory[addr]; + +endmodule diff --git a/read_6by6result.py b/read_6by6result.py new file mode 100644 index 0000000..c6355ba --- /dev/null +++ b/read_6by6result.py @@ -0,0 +1,20 @@ +""" +pull out the part of memory holding the matrix +and format it as a numpy array to make it easy +to read +""" +import numpy as np + + +f = open("memory_out.txt") +fullstr = f.read() +f.close() + +full_mem = fullstr.split('\n') +res_mem = full_mem[72:108] + +matrix = [] +for i in range(6): + matrix.append([int(val) for val in res_mem[i*6:(i+1)*6]]) + +print np.array(matrix) diff --git a/registers.t.v b/registers.t.v new file mode 100644 index 0000000..ae0c1b4 --- /dev/null +++ b/registers.t.v @@ -0,0 +1,96 @@ +`include "registers.v" + +module test_registers(); + +reg clk, a_wrenable, b_wrenable, c_wrenable; +reg[4:0] a0_in, a1_in, a2_in, a3_in, a4_in, a5_in, a6_in, a7_in, a8_in; +reg[4:0] b0_in, b1_in, b2_in, b3_in, b4_in, b5_in, b6_in, b7_in, b8_in; +reg[4:0] c0_in, c1_in, c2_in, c3_in, c4_in, c5_in, c6_in, c7_in, c8_in; +wire[4:0] a0_out, a1_out, a2_out, a3_out, a4_out, a5_out, a6_out, a7_out, a8_out; +wire[4:0] b0_out, b1_out, b2_out, b3_out, b4_out, b5_out, b6_out, b7_out, b8_out; +wire[4:0] c0_out, c1_out, c2_out, c3_out, c4_out, c5_out, c6_out, c7_out, c8_out; + +regfile #(.width(5)) dut ( + .a_wrenable(a_wrenable), .b_wrenable(b_wrenable), .c_wrenable(c_wrenable), + .a0_in (a0_in), .a1_in (a1_in), .a2_in (a2_in), .a3_in (a3_in), .a4_in (a4_in), .a5_in (a5_in), + .a6_in (a6_in), .a7_in (a7_in), .a8_in (a8_in), .b0_in (b0_in), .b1_in (b1_in), .b2_in (b2_in), + .b3_in (b3_in), .b4_in (b4_in), .b5_in (b5_in), .b6_in (b6_in), .b7_in (b7_in), .b8_in (b8_in), + .c0_in (c0_in), .c1_in (c1_in), .c2_in (c2_in), .c3_in (c3_in), .c4_in (c4_in), .c5_in (c5_in), + .c6_in (c6_in), .c7_in (c7_in), .c8_in (c8_in), .a0_out (a0_out), .a1_out (a1_out), .a2_out (a2_out), + .a3_out (a3_out), .a4_out (a4_out), .a5_out (a5_out), .a6_out (a6_out), .a7_out (a7_out), .a8_out (a8_out), + .b0_out (b0_out), .b1_out (b1_out), .b2_out (b2_out), .b3_out (b3_out), .b4_out (b4_out), .b5_out (b5_out), + .b6_out (b6_out), .b7_out (b7_out), .b8_out (b8_out), .c0_out (c0_out), .c1_out (c1_out), .c2_out (c2_out), + .c3_out (c3_out), .c4_out (c4_out), .c5_out (c5_out), .c6_out (c6_out), .c7_out (c7_out), .c8_out (c8_out), + .clk (clk) +); + +genvar i; +initial begin + +a0_in = 5'd0; +a1_in = 5'd1; +a2_in = 5'd2; +a3_in = 5'd3; +a4_in = 5'd4; +a5_in = 5'd5; +a6_in = 5'd6; +a7_in = 5'd7; +a8_in = 5'd8; +b0_in = 5'd9; +b1_in = 5'd10; +b2_in = 5'd11; +b3_in = 5'd12; +b4_in = 5'd13; +b5_in = 5'd14; +b6_in = 5'd15; +b7_in = 5'd16; +b8_in = 5'd17; +c0_in = 5'd18; +c1_in = 5'd19; +c2_in = 5'd20; +c3_in = 5'd21; +c4_in = 5'd22; +c5_in = 5'd23; +c6_in = 5'd24; +c7_in = 5'd25; +c8_in = 5'd26; +a_wrenable = 0; +b_wrenable = 0; +c_wrenable = 0; + +clk = 0; #5 +clk = 1; #5 +clk = 0; #5 + +if (a0_out !== 5'd0 || a1_out !== 5'd0 || a2_out !== 5'd0 || +a3_out !== 5'd0 || a4_out !== 5'd0 || a5_out !== 5'd0 || a6_out !== 5'd0 || +a7_out !== 5'd0 || a8_out !== 5'd0 || b0_out !== 5'd0 || b1_out !== 5'd0 || +b2_out !== 5'd0 || b3_out !== 5'd0 || b4_out !== 5'd0 || b5_out !== 5'd0 || +b6_out !== 5'd0 || b7_out !== 5'd0 || b8_out !== 5'd0 || c0_out !== 5'd0 || +c1_out !== 5'd0 || c2_out !== 5'd0 || c3_out !== 5'd0 || c4_out !== 5'd0 || +c5_out !== 5'd0 || c6_out !== 5'd0 || c7_out !== 5'd0 || c8_out !== 5'd0) + + $display("wrote to registers when wrenable was 0"); + +a_wrenable = 1; +b_wrenable = 1; +c_wrenable = 1; + +clk = 0; #5 +clk = 1; #5 +clk = 0; #5 + +if (a0_out !== 5'd0 || a1_out !== 5'd1 || a2_out !== 5'd2 || +a3_out !== 5'd3 || a4_out !== 5'd4 || a5_out !== 5'd5 || a6_out !== 5'd6 || +a7_out !== 5'd7 || a8_out !== 5'd8 || b0_out !== 5'd9 || b1_out !== 5'd10 || +b2_out !== 5'd11 || b3_out !== 5'd12 || b4_out !== 5'd13 || b5_out !== 5'd14 || +b6_out !== 5'd15 || b7_out !== 5'd16 || b8_out !== 5'd17 || c0_out !== 5'd18 || +c1_out !== 5'd19 || c2_out !== 5'd20 || c3_out !== 5'd21 || c4_out !== 5'd22 || +c5_out !== 5'd23 || c6_out !== 5'd24 || c7_out !== 5'd25 || c8_out !== 5'd26) + + $display("wrote to registers incorrectly"); + + +end + +endmodule diff --git a/registers.v b/registers.v new file mode 100644 index 0000000..2f30f73 --- /dev/null +++ b/registers.v @@ -0,0 +1,171 @@ +module register #(parameter width = 5'd4, parameter init = {width{1'd0}}) +( + + output reg[width-1:0] q, + input[width-1:0] d, + input wrenable, + input clk +); + + initial begin + q={width{1'b0}}+init; + end + + always @(posedge clk) begin + if(wrenable) begin + q = d; + end + end + +endmodule + +module regfile +#(parameter width = 5) +( + input a_wrenable, + input b_wrenable, + input c_wrenable, + input[width - 1:0] a0_in, a1_in, a2_in, a3_in, a4_in, a5_in, a6_in, a7_in, a8_in, + input[width - 1:0] b0_in, b1_in, b2_in, b3_in, b4_in, b5_in, b6_in, b7_in, b8_in, + input[width - 1:0] c0_in, c1_in, c2_in, c3_in, c4_in, c5_in, c6_in, c7_in, c8_in, + output[width - 1:0] a0_out, a1_out, a2_out, a3_out, a4_out, a5_out, a6_out, a7_out, a8_out, + output[width - 1:0] b0_out, b1_out, b2_out, b3_out, b4_out, b5_out, b6_out, b7_out, b8_out, + output[width - 1:0] c0_out, c1_out, c2_out, c3_out, c4_out, c5_out, c6_out, c7_out, c8_out, + input clk +); +register #(.width(width)) + a0_reg(.d(a0_in), + .q(a0_out), + .wrenable(a_wrenable), + .clk(clk)); +register #(.width(width)) + a1_reg(.d(a1_in), + .q(a1_out), + .wrenable(a_wrenable), + .clk(clk)); +register #(.width(width)) + a2_reg(.d(a2_in), + .q(a2_out), + .wrenable(a_wrenable), + .clk(clk)); +register #(.width(width)) + a3_reg(.d(a3_in), + .q(a3_out), + .wrenable(a_wrenable), + .clk(clk)); +register #(.width(width)) + a4_reg(.d(a4_in), + .q(a4_out), + .wrenable(a_wrenable), + .clk(clk)); +register #(.width(width)) + a5_reg(.d(a5_in), + .q(a5_out), + .wrenable(a_wrenable), + .clk(clk)); +register #(.width(width)) + a6_reg(.d(a6_in), + .q(a6_out), + .wrenable(a_wrenable), + .clk(clk)); +register #(.width(width)) + a7_reg(.d(a7_in), + .q(a7_out), + .wrenable(a_wrenable), + .clk(clk)); +register #(.width(width)) + a8_reg(.d(a8_in), + .q(a8_out), + .wrenable(a_wrenable), + .clk(clk)); +register #(.width(width)) + b0_reg(.d(b0_in), + .q(b0_out), + .wrenable(b_wrenable), + .clk(clk)); +register #(.width(width)) + b1_reg(.d(b1_in), + .q(b1_out), + .wrenable(b_wrenable), + .clk(clk)); +register #(.width(width)) + b2_reg(.d(b2_in), + .q(b2_out), + .wrenable(b_wrenable), + .clk(clk)); +register #(.width(width)) + b3_reg(.d(b3_in), + .q(b3_out), + .wrenable(b_wrenable), + .clk(clk)); +register #(.width(width)) + b4_reg(.d(b4_in), + .q(b4_out), + .wrenable(b_wrenable), + .clk(clk)); +register #(.width(width)) + b5_reg(.d(b5_in), + .q(b5_out), + .wrenable(b_wrenable), + .clk(clk)); +register #(.width(width)) + b6_reg(.d(b6_in), + .q(b6_out), + .wrenable(b_wrenable), + .clk(clk)); +register #(.width(width)) + b7_reg(.d(b7_in), + .q(b7_out), + .wrenable(b_wrenable), + .clk(clk)); +register #(.width(width)) + b8_reg(.d(b8_in), + .q(b8_out), + .wrenable(b_wrenable), + .clk(clk)); +register #(.width(width)) + c0_reg(.d(c0_in), + .q(c0_out), + .wrenable(c_wrenable), + .clk(clk)); +register #(.width(width)) + c1_reg(.d(c1_in), + .q(c1_out), + .wrenable(c_wrenable), + .clk(clk)); +register #(.width(width)) + c2_reg(.d(c2_in), + .q(c2_out), + .wrenable(c_wrenable), + .clk(clk)); +register #(.width(width)) + c3_reg(.d(c3_in), + .q(c3_out), + .wrenable(c_wrenable), + .clk(clk)); +register #(.width(width)) + c4_reg(.d(c4_in), + .q(c4_out), + .wrenable(c_wrenable), + .clk(clk)); +register #(.width(width)) + c5_reg(.d(c5_in), + .q(c5_out), + .wrenable(c_wrenable), + .clk(clk)); +register #(.width(width)) + c6_reg(.d(c6_in), + .q(c6_out), + .wrenable(c_wrenable), + .clk(clk)); +register #(.width(width)) + c7_reg(.d(c7_in), + .q(c7_out), + .wrenable(c_wrenable), + .clk(clk)); +register #(.width(width)) + c8_reg(.d(c8_in), + .q(c8_out), + .wrenable(c_wrenable), + .clk(clk)); +endmodule diff --git a/run_tests.sh b/run_tests.sh new file mode 100755 index 0000000..22bbe4d --- /dev/null +++ b/run_tests.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +make + +echo "----------Running all module tests:----------" + +echo " Running arithmetic tests..." +./arithmetic + +echo " Running dot tests..." +./dot + +echo " Running matrixmultiplication tests..." +./matrixmultiplication + +cat mem_test_data.dat > matrix_mem.dat +echo " Running data_mem tests..." +./data_mem + +echo " Running load_block tests..." +./load_block + +echo " Running add_block tests..." +./add_block + +echo " Running multiplier tests..." +./multiplier + +echo " Running registers tests..." +./registers + +echo " Running multiplexer tests..." +./multiplexer + +echo " Running fsm tests..." +./fsm + +cat prog_mem_unit_test.dat > prog_mem.dat +echo " Running prog_mem tests..." +./prog_mem + +echo " Running controller tests..." +./controller + +cat matrix_manager_test.dat > matrix_mem.dat +echo " Running matrix_manager tests..." +./matrix_manager + +echo " Running multiplier_network tests..." +./multiplier_network + +make clean diff --git a/setup_memory.py b/setup_memory.py new file mode 100644 index 0000000..14c1a9e --- /dev/null +++ b/setup_memory.py @@ -0,0 +1,77 @@ +""" +Setup the memory to do a matrix multiplication with our verilog simulation + +- formats matricies correctly and stores them in memory +- Sets up program memory to multiply the matricies together with our hardware +""" + +import numpy as np + +WORD_LEN = 5 + +# currently we only support 6x6 matricies +matrixA = [[1, 2, 3, 3, 2, 1], + [2, 3, 3, 2, 1, 1], + [1, 3, 1, 1, 2, 1], + [2, 1, 2, 0, 0, 1], + [3, 1, 0, 3, 2, 2], + [1, 2, 3, 2, 3, 2]] + +matrixB = [[1, 2, 3, 2, 2, 1], + [2, 3, 0, 2, 1, 1], + [1, 3, 1, 1, 2, 1], + [2, 1, 1, 0, 0, 1], + [3, 1, 0, 2, 3, 2], + [1, 2, 3, 2, 1, 2]] + +# format matrices to store in data memory +Astr = '' +for row in matrixA: + for val in row: + bin_val = bin(val)[2:] + extended_val = '0'*(WORD_LEN-len(bin_val)) + bin_val + Astr += extended_val + '\n' + +Bstr = '' +for row in matrixB: + for val in row: + bin_val = bin(val)[2:] + extended_val = '0'*(WORD_LEN-len(bin_val)) + bin_val + Bstr += extended_val + '\n' + +# get expected output and print calculation +expected = np.dot(matrixA, matrixB) + +print "a:\n", np.array(matrixA) +print "\nb:\n", np.array(matrixB) +print "\nexpected result:\n", expected +print "(max: {})".format(np.amax(expected)) + + +# write data memory +used_mem = Astr + Bstr +unused_mem = ('0'*WORD_LEN+'\n')*(1024-used_mem.count('\n')) +mem = used_mem + unused_mem[:-1] +f = open("data_test.dat", 'w') +f.write(mem) +f.close() + +# write program memory +# this would need to be more complicated to support larger matrices + +load_8_chunks = "00000\n00001\n00010\n00011\n00100\n00101\n00110\n00111\n" +store_4_chunks = "01000\n01001\n01010\n01011\n" +multiply6by6 = load_8_chunks + store_4_chunks + +full_program = multiply6by6 + +prog_mem = full_program + "00000\n"*(64-full_program.count('\n')) +f2 = open("prog_test.dat", 'w') +f2.write(prog_mem) +f2.close() + + + + + +