This lesson starts at commit 0fcfa0cdaf00a57a0f2da39b78b9c2c6cdfab014.
5. Intermezzo: Running our design
Let's take a break from starting at simulation waveforms and try to run our design on the dev board.
It would be nice to somehow see the result of our computations, so let's add a custom instruction that displays the 8 least significant bits of a register on the LEDs of the Mimas A7.
Until now, we have only run our core in simulation. The "main" or "top level" module that will be used on the FPGA is top_level.vhd. It was already there in the template, we have just never used it. It already has outputs for the LEDs on the dev board. So, first, let's instantiate our core in the top level module.
|
@@ -14,14 +14,14 @@ end top_level;
|
|
| 14 |
architecture rtl of top_level is
|
| 15 |
signal count: unsigned(31 downto 0) := (others => '0');
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
begin
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
process (clk)
|
| 21 |
-
begin
|
| 22 |
-
if rising_edge(clk) then
|
| 23 |
-
count <= count + 1;
|
| 24 |
-
end if;
|
| 25 |
-
end process;
|
| 26 |
|
| 27 |
end rtl;
|
|
|
|
| 14 |
architecture rtl of top_level is
|
| 15 |
signal count: unsigned(31 downto 0) := (others => '0');
|
| 16 |
|
| 17 |
+
component core is
|
| 18 |
+
port (
|
| 19 |
+
clk: in std_logic
|
| 20 |
+
);
|
| 21 |
+
end component;
|
| 22 |
+
|
| 23 |
begin
|
| 24 |
+
|
| 25 |
+
core_inst: core port map(clk => clk);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
end rtl;
|
To be able to set the LEDs we need to make an output for them in our core.
|
@@ -8,7 +8,8 @@ use work.core_constants.all;
|
|
| 8 |
|
| 9 |
entity core is
|
| 10 |
port (
|
| 11 |
-
clk: in std_logic
|
|
|
|
| 12 |
);
|
| 13 |
end core;
|
| 14 |
|
|
|
|
| 8 |
|
| 9 |
entity core is
|
| 10 |
port (
|
| 11 |
+
clk: in std_logic;
|
| 12 |
+
led: out std_logic_vector(7 downto 0)
|
| 13 |
);
|
| 14 |
end core;
|
| 15 |
|
|
@@ -16,12 +16,13 @@ architecture rtl of top_level is
|
|
| 16 |
|
| 17 |
component core is
|
| 18 |
port (
|
| 19 |
-
clk: in std_logic
|
|
|
|
| 20 |
);
|
| 21 |
end component;
|
| 22 |
|
| 23 |
begin
|
| 24 |
|
| 25 |
-
core_inst: core port map(clk => clk);
|
| 26 |
|
| 27 |
end rtl;
|
|
|
|
| 16 |
|
| 17 |
component core is
|
| 18 |
port (
|
| 19 |
+
clk: in std_logic;
|
| 20 |
+
led: out std_logic_vector(7 downto 0)
|
| 21 |
);
|
| 22 |
end component;
|
| 23 |
|
| 24 |
begin
|
| 25 |
|
| 26 |
+
core_inst: core port map(clk => clk, led => led);
|
| 27 |
|
| 28 |
end rtl;
|
We want to set it from the execute stage.
|
@@ -43,7 +43,8 @@ architecture rtl of core is
|
|
| 43 |
port (
|
| 44 |
clk: in std_logic;
|
| 45 |
input: in decode_output_t;
|
| 46 |
-
output: out execute_output_t
|
|
|
|
| 47 |
);
|
| 48 |
end component;
|
| 49 |
|
|
@@ -60,7 +61,7 @@ begin
|
|
| 60 |
|
| 61 |
decode_write_inst: decode_write port map(clk => clk, decode_input => fetch_output, decode_output => decode_output, write_input => memory_output, pipeline_ready => pipeline_ready);
|
| 62 |
|
| 63 |
-
execute_inst: execute port map(clk => clk, input => decode_output, output => execute_output);
|
| 64 |
|
| 65 |
memory_inst: memory port map(clk => clk, input => execute_output, output => memory_output);
|
| 66 |
|
|
|
|
| 43 |
port (
|
| 44 |
clk: in std_logic;
|
| 45 |
input: in decode_output_t;
|
| 46 |
+
output: out execute_output_t;
|
| 47 |
+
led: out std_logic_vector(7 downto 0)
|
| 48 |
);
|
| 49 |
end component;
|
| 50 |
|
|
|
|
| 61 |
|
| 62 |
decode_write_inst: decode_write port map(clk => clk, decode_input => fetch_output, decode_output => decode_output, write_input => memory_output, pipeline_ready => pipeline_ready);
|
| 63 |
|
| 64 |
+
execute_inst: execute port map(clk => clk, input => decode_output, output => execute_output, led => led);
|
| 65 |
|
| 66 |
memory_inst: memory port map(clk => clk, input => execute_output, output => memory_output);
|
| 67 |
|
|
@@ -10,7 +10,8 @@ entity execute is
|
|
| 10 |
port (
|
| 11 |
clk: in std_logic;
|
| 12 |
input: in decode_output_t;
|
| 13 |
-
output: out execute_output_t := DEFAULT_EXECUTE_OUTPUT
|
|
|
|
| 14 |
);
|
| 15 |
end execute;
|
| 16 |
|
|
|
|
| 10 |
port (
|
| 11 |
clk: in std_logic;
|
| 12 |
input: in decode_output_t;
|
| 13 |
+
output: out execute_output_t := DEFAULT_EXECUTE_OUTPUT;
|
| 14 |
+
led: out std_logic_vector(7 downto 0) := (others => '0')
|
| 15 |
);
|
| 16 |
end execute;
|
| 17 |
|
Now, let's add an operation for setting the LED.
|
@@ -29,6 +29,8 @@ begin
|
|
| 29 |
if input.is_active = '1' and input.is_invalid = '0' then
|
| 30 |
if input.operation = OP_ADD then
|
| 31 |
v_output.result := std_logic_vector(unsigned(input.operand1) + unsigned(input.operand2));
|
|
|
|
|
|
|
| 32 |
else
|
| 33 |
assert false report "Unhandled operation value in execute stage" severity failure;
|
| 34 |
end if;
|
|
|
|
| 29 |
if input.is_active = '1' and input.is_invalid = '0' then
|
| 30 |
if input.operation = OP_ADD then
|
| 31 |
v_output.result := std_logic_vector(unsigned(input.operand1) + unsigned(input.operand2));
|
| 32 |
+
elsif input.operation = OP_LED then
|
| 33 |
+
led <= input.operand1(7 downto 0);
|
| 34 |
else
|
| 35 |
assert false report "Unhandled operation value in execute stage" severity failure;
|
| 36 |
end if;
|
|
@@ -3,7 +3,7 @@ use ieee.std_logic_1164.all;
|
|
| 3 |
|
| 4 |
|
| 5 |
package core_types is
|
| 6 |
-
type operation_t is (OP_ADD);
|
| 7 |
|
| 8 |
type fetch_output_t is record
|
| 9 |
is_active: std_logic;
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
package core_types is
|
| 6 |
+
type operation_t is (OP_ADD, OP_LED);
|
| 7 |
|
| 8 |
type fetch_output_t is record
|
| 9 |
is_active: std_logic;
|
Now, we can add our custom instruction. I'll use the opcode field and check if it's all ones.
|
@@ -72,6 +72,12 @@ begin
|
|
| 72 |
v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
|
| 73 |
v_decode_output.operand2 := reg(to_integer(unsigned(rs2)));
|
| 74 |
v_decode_output.destination_reg := rd;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
else
|
| 76 |
v_decode_output.is_invalid := '1';
|
| 77 |
end if;
|
|
|
|
| 72 |
v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
|
| 73 |
v_decode_output.operand2 := reg(to_integer(unsigned(rs2)));
|
| 74 |
v_decode_output.destination_reg := rd;
|
| 75 |
+
elsif opcode = "1111111" and funct3 = "000" then
|
| 76 |
+
-- LED rs1: set the LEDs to the 8 least significant bits of rs1
|
| 77 |
+
v_decode_output.operation := OP_LED;
|
| 78 |
+
v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
|
| 79 |
+
v_decode_output.operand2 := (others => '0');
|
| 80 |
+
v_decode_output.destination_reg := (others => '0');
|
| 81 |
else
|
| 82 |
v_decode_output.is_invalid := '1';
|
| 83 |
end if;
|
I also want to implement an instruction that makes our CPU hang. If we don't stop it, our CPU will keep executing the same 16 instructions over and over again, and it will be impossible to observe the LEDs when they keep changing thousands of times per second.
Currently, the fetch stage only fetches new instructions when an instruction is done, which is detected by observing the is_active flag. So, we can simply add an instruction that does not set is_active, and the fetch stage will stop, halting the entire pipeline.
|
@@ -78,6 +78,9 @@ begin
|
|
| 78 |
v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
|
| 79 |
v_decode_output.operand2 := (others => '0');
|
| 80 |
v_decode_output.destination_reg := (others => '0');
|
|
|
|
|
|
|
|
|
|
| 81 |
else
|
| 82 |
v_decode_output.is_invalid := '1';
|
| 83 |
end if;
|
|
|
|
| 78 |
v_decode_output.operand1 := reg(to_integer(unsigned(rs1)));
|
| 79 |
v_decode_output.operand2 := (others => '0');
|
| 80 |
v_decode_output.destination_reg := (others => '0');
|
| 81 |
+
elsif opcode = "1111111" and funct3 = "001" then
|
| 82 |
+
-- HANG
|
| 83 |
+
v_decode_output := DEFAULT_DECODE_OUTPUT;
|
| 84 |
else
|
| 85 |
v_decode_output.is_invalid := '1';
|
| 86 |
end if;
|
I've dubbed this custom instruction HANG.
Can we do anything interesting with 16 instructions? If we set x2 to 1 with ADDI x2, x2, 1, and then alternatingly do ADD x1, x1, x2 and ADD x2, x1, x2, we can calculate the Fibonacci numbers. The even-numbered Fibonacci numbers (F0, F1, F2, ...) end up in x1, while the odd-numbered Fibonacci numbers (F1, F3, F5, ...) end up in x2.
After doing n iterations of the ADD instruction, we have computed the (n+1)th Fibonacci number. So if we do a single ADDI instruction, 11 ADD instructions, the LED x2 instruction, and the HANG instruction, we compute the 13th Fibonacci number in 15 instructions.
Using the online assembler again, we find that ADDI x2, x2, 1 assembles to 00110113, ADD x1, x1, x2 assembles to 002080b3, and ADD x2, x1, x2 assembles to 00208133.
We have to assemble our custom instructions by hand. LED x2 assembles to 0001007f, and HANG assembles to 0000107f.
Now, we put this into our instruction memory.
|
@@ -18,8 +18,8 @@ end fetch;
|
|
| 18 |
architecture rtl of fetch is
|
| 19 |
type instruction_memory_t is array(0 to 15) of std_logic_vector(31 downto 0);
|
| 20 |
signal imem: instruction_memory_t := (
|
| 21 |
-
X"
|
| 22 |
-
X"
|
| 23 |
);
|
| 24 |
|
| 25 |
signal pc: unsigned(31 downto 0) := (others => '0');
|
|
|
|
| 18 |
architecture rtl of fetch is
|
| 19 |
type instruction_memory_t is array(0 to 15) of std_logic_vector(31 downto 0);
|
| 20 |
signal imem: instruction_memory_t := (
|
| 21 |
+
X"00110113", X"002080b3", X"00208133", X"002080b3", X"00208133", X"002080b3", X"00208133", X"002080b3",
|
| 22 |
+
X"00208133", X"002080b3", X"00208133", X"002080b3", X"00208133", X"0001007f", X"0000107f", X"00000000"
|
| 23 |
);
|
| 24 |
|
| 25 |
signal pc: unsigned(31 downto 0) := (others => '0');
|
If we run this with
make bitstream
make program
and check the LEDs on our dev board, we can see they display the binary pattern 11101001. This is the binary representation of the number 233, which is indeed the 13th Fibonacci number. Success!